From 4c2bb43775947775401cbb9d774823c5723a91f8 Mon Sep 17 00:00:00 2001 From: Alexander Bayandin Date: Thu, 18 Aug 2022 13:37:28 +0100 Subject: [PATCH] Reformat all python files by black & isort --- scripts/coverage | 13 +- scripts/export_import_between_pageservers.py | 347 +++--- scripts/generate_perf_report_page.py | 152 +-- scripts/git-upload | 11 +- scripts/ingest_perf_test_result.py | 82 +- .../batch_others/test_ancestor_branch.py | 71 +- test_runner/batch_others/test_auth.py | 42 +- test_runner/batch_others/test_backpressure.py | 59 +- .../batch_others/test_basebackup_error.py | 3 +- .../batch_others/test_branch_and_gc.py | 101 +- .../batch_others/test_branch_behind.py | 92 +- test_runner/batch_others/test_branching.py | 58 +- .../batch_others/test_broken_timeline.py | 33 +- .../batch_others/test_clog_truncate.py | 55 +- test_runner/batch_others/test_close_fds.py | 28 +- test_runner/batch_others/test_config.py | 14 +- .../batch_others/test_crafted_wal_end.py | 72 +- test_runner/batch_others/test_createdropdb.py | 62 +- test_runner/batch_others/test_createuser.py | 18 +- test_runner/batch_others/test_fsm_truncate.py | 7 +- test_runner/batch_others/test_fullbackup.py | 49 +- .../batch_others/test_gc_aggressive.py | 20 +- test_runner/batch_others/test_import.py | 168 +-- test_runner/batch_others/test_large_schema.py | 19 +- test_runner/batch_others/test_lsn_mapping.py | 32 +- test_runner/batch_others/test_multixact.py | 31 +- test_runner/batch_others/test_neon_cli.py | 35 +- test_runner/batch_others/test_next_xid.py | 12 +- test_runner/batch_others/test_normal_work.py | 24 +- .../batch_others/test_old_request_lsn.py | 30 +- .../batch_others/test_pageserver_api.py | 138 ++- .../batch_others/test_pageserver_catchup.py | 27 +- .../batch_others/test_pageserver_restart.py | 22 +- .../batch_others/test_parallel_copy.py | 14 +- test_runner/batch_others/test_pitr_gc.py | 36 +- test_runner/batch_others/test_proxy.py | 23 +- .../batch_others/test_read_validation.py | 57 +- .../batch_others/test_readonly_node.py | 78 +- test_runner/batch_others/test_recovery.py | 15 +- .../batch_others/test_remote_storage.py | 69 +- test_runner/batch_others/test_subxacts.py | 22 +- test_runner/batch_others/test_tenant_conf.py | 65 +- .../batch_others/test_tenant_detach.py | 38 +- .../batch_others/test_tenant_relocation.py | 179 +-- test_runner/batch_others/test_tenant_tasks.py | 7 +- test_runner/batch_others/test_tenants.py | 52 +- .../test_tenants_with_remote_storage.py | 30 +- .../batch_others/test_timeline_delete.py | 47 +- .../batch_others/test_timeline_size.py | 230 ++-- test_runner/batch_others/test_twophase.py | 30 +- test_runner/batch_others/test_vm_bits.py | 54 +- test_runner/batch_others/test_wal_acceptor.py | 418 ++++--- .../batch_others/test_wal_acceptor_async.py | 267 ++-- test_runner/batch_others/test_wal_restore.py | 46 +- .../batch_pg_regress/test_isolation.py | 35 +- .../batch_pg_regress/test_neon_regress.py | 43 +- .../batch_pg_regress/test_pg_regress.py | 39 +- test_runner/conftest.py | 12 +- test_runner/fixtures/benchmark_fixture.py | 161 ++- test_runner/fixtures/compare_fixtures.py | 105 +- test_runner/fixtures/log_helper.py | 15 +- test_runner/fixtures/metrics.py | 8 +- test_runner/fixtures/neon_fixtures.py | 1093 +++++++++-------- test_runner/fixtures/pg_stats.py | 36 +- test_runner/fixtures/slow.py | 1 + test_runner/fixtures/utils.py | 42 +- .../performance/test_branch_creation.py | 65 +- test_runner/performance/test_bulk_insert.py | 11 +- .../performance/test_bulk_tenant_create.py | 28 +- .../performance/test_compare_pg_stats.py | 79 +- test_runner/performance/test_copy.py | 31 +- test_runner/performance/test_dup_key.py | 26 +- test_runner/performance/test_gist_build.py | 9 +- test_runner/performance/test_hot_page.py | 24 +- test_runner/performance/test_hot_table.py | 20 +- .../performance/test_parallel_copy_to.py | 28 +- test_runner/performance/test_perf_pgbench.py | 83 +- test_runner/performance/test_random_writes.py | 35 +- test_runner/performance/test_seqscans.py | 30 +- test_runner/performance/test_startup.py | 17 +- .../performance/test_wal_backpressure.py | 146 ++- .../performance/test_write_amplification.py | 15 +- test_runner/pg_clients/test_pg_clients.py | 12 +- test_runner/test_broken.py | 16 +- 84 files changed, 3282 insertions(+), 2687 deletions(-) diff --git a/scripts/coverage b/scripts/coverage index f2c46d9ae9..af0d067419 100755 --- a/scripts/coverage +++ b/scripts/coverage @@ -9,13 +9,6 @@ # * https://github.com/taiki-e/cargo-llvm-cov # * https://github.com/llvm/llvm-project/tree/main/llvm/test/tools/llvm-cov -from abc import ABC, abstractmethod -from dataclasses import dataclass -from pathlib import Path -from tempfile import TemporaryDirectory -from textwrap import dedent -from typing import Any, Dict, Iterator, Iterable, List, Optional - import argparse import hashlib import json @@ -24,6 +17,12 @@ import shutil import socket import subprocess import sys +from abc import ABC, abstractmethod +from dataclasses import dataclass +from pathlib import Path +from tempfile import TemporaryDirectory +from textwrap import dedent +from typing import Any, Dict, Iterable, Iterator, List, Optional def file_mtime_or_zero(path: Path) -> int: diff --git a/scripts/export_import_between_pageservers.py b/scripts/export_import_between_pageservers.py index 96f1d36ddb..5b9fc76768 100755 --- a/scripts/export_import_between_pageservers.py +++ b/scripts/export_import_between_pageservers.py @@ -20,20 +20,21 @@ # For more context on how to use this, see: # https://github.com/neondatabase/cloud/wiki/Storage-format-migration -import os -from os import path -import shutil -from pathlib import Path -import tempfile -from contextlib import closing -import psycopg2 -import subprocess import argparse +import os +import shutil +import subprocess +import tempfile import time -import requests import uuid +from contextlib import closing +from os import path +from pathlib import Path +from typing import Any, Callable, Dict, Iterator, List, Optional, Tuple, TypeVar, Union, cast + +import psycopg2 +import requests from psycopg2.extensions import connection as PgConnection -from typing import Any, Callable, Dict, Iterator, List, Optional, TypeVar, cast, Union, Tuple ############################################### ### client-side utils copied from test fixtures @@ -45,7 +46,7 @@ _global_counter = 0 def global_counter() -> int: - """ A really dumb global counter. + """A really dumb global counter. This is useful for giving output files a unique number, so if we run the same command multiple times we can keep their output separate. """ @@ -55,7 +56,7 @@ def global_counter() -> int: def subprocess_capture(capture_dir: str, cmd: List[str], **kwargs: Any) -> str: - """ Run a process and capture its output + """Run a process and capture its output Output will go to files named "cmd_NNN.stdout" and "cmd_NNN.stderr" where "cmd" is the name of the program and NNN is an incrementing counter. @@ -63,13 +64,13 @@ def subprocess_capture(capture_dir: str, cmd: List[str], **kwargs: Any) -> str: Returns basepath for files with captured output. """ assert type(cmd) is list - base = os.path.basename(cmd[0]) + '_{}'.format(global_counter()) + base = os.path.basename(cmd[0]) + "_{}".format(global_counter()) basepath = os.path.join(capture_dir, base) - stdout_filename = basepath + '.stdout' - stderr_filename = basepath + '.stderr' + stdout_filename = basepath + ".stdout" + stderr_filename = basepath + ".stderr" - with open(stdout_filename, 'w') as stdout_f: - with open(stderr_filename, 'w') as stderr_f: + with open(stdout_filename, "w") as stdout_f: + with open(stderr_filename, "w") as stderr_f: print('(capturing output to "{}.stdout")'.format(base)) subprocess.run(cmd, **kwargs, stdout=stdout_f, stderr=stderr_f) @@ -77,15 +78,16 @@ def subprocess_capture(capture_dir: str, cmd: List[str], **kwargs: Any) -> str: class PgBin: - """ A helper class for executing postgres binaries """ + """A helper class for executing postgres binaries""" + def __init__(self, log_dir: Path, pg_distrib_dir): self.log_dir = log_dir - self.pg_bin_path = os.path.join(str(pg_distrib_dir), 'bin') + self.pg_bin_path = os.path.join(str(pg_distrib_dir), "bin") self.env = os.environ.copy() - self.env['LD_LIBRARY_PATH'] = os.path.join(str(pg_distrib_dir), 'lib') + self.env["LD_LIBRARY_PATH"] = os.path.join(str(pg_distrib_dir), "lib") def _fixpath(self, command: List[str]): - if '/' not in command[0]: + if "/" not in command[0]: command[0] = os.path.join(self.pg_bin_path, command[0]) def _build_env(self, env_add: Optional[Env]) -> Env: @@ -106,15 +108,17 @@ class PgBin: """ self._fixpath(command) - print('Running command "{}"'.format(' '.join(command))) + print('Running command "{}"'.format(" ".join(command))) env = self._build_env(env) subprocess.run(command, env=env, cwd=cwd, check=True) - def run_capture(self, - command: List[str], - env: Optional[Env] = None, - cwd: Optional[str] = None, - **kwargs: Any) -> str: + def run_capture( + self, + command: List[str], + env: Optional[Env] = None, + cwd: Optional[str] = None, + **kwargs: Any, + ) -> str: """ Run one of the postgres binaries, with stderr and stdout redirected to a file. This is just like `run`, but for chatty programs. Returns basepath for files @@ -122,35 +126,33 @@ class PgBin: """ self._fixpath(command) - print('Running command "{}"'.format(' '.join(command))) + print('Running command "{}"'.format(" ".join(command))) env = self._build_env(env) - return subprocess_capture(str(self.log_dir), - command, - env=env, - cwd=cwd, - check=True, - **kwargs) + return subprocess_capture( + str(self.log_dir), command, env=env, cwd=cwd, check=True, **kwargs + ) class PgProtocol: - """ Reusable connection logic """ + """Reusable connection logic""" + def __init__(self, **kwargs): self.default_options = kwargs def conn_options(self, **kwargs): conn_options = self.default_options.copy() - if 'dsn' in kwargs: - conn_options.update(parse_dsn(kwargs['dsn'])) + if "dsn" in kwargs: + conn_options.update(parse_dsn(kwargs["dsn"])) conn_options.update(kwargs) # Individual statement timeout in seconds. 2 minutes should be # enough for our tests, but if you need a longer, you can # change it by calling "SET statement_timeout" after # connecting. - if 'options' in conn_options: - conn_options['options'] = f"-cstatement_timeout=120s " + conn_options['options'] + if "options" in conn_options: + conn_options["options"] = f"-cstatement_timeout=120s " + conn_options["options"] else: - conn_options['options'] = "-cstatement_timeout=120s" + conn_options["options"] = "-cstatement_timeout=120s" return conn_options # autocommit=True here by default because that's what we need most of the time @@ -194,18 +196,18 @@ class PgProtocol: class VanillaPostgres(PgProtocol): def __init__(self, pgdatadir: Path, pg_bin: PgBin, port: int, init=True): - super().__init__(host='localhost', port=port, dbname='postgres') + super().__init__(host="localhost", port=port, dbname="postgres") self.pgdatadir = pgdatadir self.pg_bin = pg_bin self.running = False if init: - self.pg_bin.run_capture(['initdb', '-D', str(pgdatadir)]) + self.pg_bin.run_capture(["initdb", "-D", str(pgdatadir)]) self.configure([f"port = {port}\n"]) def configure(self, options: List[str]): """Append lines into postgresql.conf file.""" assert not self.running - with open(os.path.join(self.pgdatadir, 'postgresql.conf'), 'a') as conf_file: + with open(os.path.join(self.pgdatadir, "postgresql.conf"), "a") as conf_file: conf_file.write("\n".join(options)) def start(self, log_path: Optional[str] = None): @@ -216,12 +218,13 @@ class VanillaPostgres(PgProtocol): log_path = os.path.join(self.pgdatadir, "pg.log") self.pg_bin.run_capture( - ['pg_ctl', '-w', '-D', str(self.pgdatadir), '-l', log_path, 'start']) + ["pg_ctl", "-w", "-D", str(self.pgdatadir), "-l", log_path, "start"] + ) def stop(self): assert self.running self.running = False - self.pg_bin.run_capture(['pg_ctl', '-w', '-D', str(self.pgdatadir), 'stop']) + self.pg_bin.run_capture(["pg_ctl", "-w", "-D", str(self.pgdatadir), "stop"]) def __enter__(self): return self @@ -246,9 +249,9 @@ class NeonPageserverHttpClient(requests.Session): res.raise_for_status() except requests.RequestException as e: try: - msg = res.json()['msg'] + msg = res.json()["msg"] except: - msg = '' + msg = "" raise NeonPageserverApiException(msg) from e def check_status(self): @@ -265,17 +268,17 @@ class NeonPageserverHttpClient(requests.Session): res = self.post( f"http://{self.host}:{self.port}/v1/tenant", json={ - 'new_tenant_id': new_tenant_id.hex, + "new_tenant_id": new_tenant_id.hex, }, ) if res.status_code == 409: if ok_if_exists: - print(f'could not create tenant: already exists for id {new_tenant_id}') + print(f"could not create tenant: already exists for id {new_tenant_id}") else: res.raise_for_status() elif res.status_code == 201: - print(f'created tenant {new_tenant_id}') + print(f"created tenant {new_tenant_id}") else: self.verbose_error(res) @@ -299,47 +302,55 @@ class NeonPageserverHttpClient(requests.Session): def lsn_to_hex(num: int) -> str: - """ Convert lsn from int to standard hex notation. """ - return "{:X}/{:X}".format(num >> 32, num & 0xffffffff) + """Convert lsn from int to standard hex notation.""" + return "{:X}/{:X}".format(num >> 32, num & 0xFFFFFFFF) def lsn_from_hex(lsn_hex: str) -> int: - """ Convert lsn from hex notation to int. """ - l, r = lsn_hex.split('/') + """Convert lsn from hex notation to int.""" + l, r = lsn_hex.split("/") return (int(l, 16) << 32) + int(r, 16) -def remote_consistent_lsn(pageserver_http_client: NeonPageserverHttpClient, - tenant: uuid.UUID, - timeline: uuid.UUID) -> int: +def remote_consistent_lsn( + pageserver_http_client: NeonPageserverHttpClient, tenant: uuid.UUID, timeline: uuid.UUID +) -> int: detail = pageserver_http_client.timeline_detail(tenant, timeline) - if detail['remote'] is None: + if detail["remote"] is None: # No remote information at all. This happens right after creating # a timeline, before any part of it has been uploaded to remote # storage yet. return 0 else: - lsn_str = detail['remote']['remote_consistent_lsn'] + lsn_str = detail["remote"]["remote_consistent_lsn"] assert isinstance(lsn_str, str) return lsn_from_hex(lsn_str) -def wait_for_upload(pageserver_http_client: NeonPageserverHttpClient, - tenant: uuid.UUID, - timeline: uuid.UUID, - lsn: int): +def wait_for_upload( + pageserver_http_client: NeonPageserverHttpClient, + tenant: uuid.UUID, + timeline: uuid.UUID, + lsn: int, +): """waits for local timeline upload up to specified lsn""" for i in range(10): current_lsn = remote_consistent_lsn(pageserver_http_client, tenant, timeline) if current_lsn >= lsn: return - print("waiting for remote_consistent_lsn to reach {}, now {}, iteration {}".format( - lsn_to_hex(lsn), lsn_to_hex(current_lsn), i + 1)) + print( + "waiting for remote_consistent_lsn to reach {}, now {}, iteration {}".format( + lsn_to_hex(lsn), lsn_to_hex(current_lsn), i + 1 + ) + ) time.sleep(1) - raise Exception("timed out while waiting for remote_consistent_lsn to reach {}, was {}".format( - lsn_to_hex(lsn), lsn_to_hex(current_lsn))) + raise Exception( + "timed out while waiting for remote_consistent_lsn to reach {}, was {}".format( + lsn_to_hex(lsn), lsn_to_hex(current_lsn) + ) + ) ############## @@ -399,7 +410,7 @@ def reconstruct_paths(log_dir, pg_bin, base_tar): # Add all template0copy paths to template0 prefix = f"base/{oid}/" if filepath.startswith(prefix): - suffix = filepath[len(prefix):] + suffix = filepath[len(prefix) :] yield f"base/{template0_oid}/{suffix}" elif filepath.startswith("global"): print(f"skipping {database} global file {filepath}") @@ -451,15 +462,17 @@ def get_rlsn(pageserver_connstr, tenant_id, timeline_id): return last_lsn, prev_lsn -def import_timeline(args, - psql_path, - pageserver_connstr, - pageserver_http, - tenant_id, - timeline_id, - last_lsn, - prev_lsn, - tar_filename): +def import_timeline( + args, + psql_path, + pageserver_connstr, + pageserver_http, + tenant_id, + timeline_id, + last_lsn, + prev_lsn, + tar_filename, +): # Import timelines to new pageserver import_cmd = f"import basebackup {tenant_id} {timeline_id} {last_lsn} {last_lsn}" full_cmd = rf"""cat {tar_filename} | {psql_path} {pageserver_connstr} -c '{import_cmd}' """ @@ -469,34 +482,30 @@ def import_timeline(args, print(f"Running: {full_cmd}") - with open(stdout_filename, 'w') as stdout_f: - with open(stderr_filename2, 'w') as stderr_f: + with open(stdout_filename, "w") as stdout_f: + with open(stderr_filename2, "w") as stderr_f: print(f"(capturing output to {stdout_filename})") pg_bin = PgBin(args.work_dir, args.pg_distrib_dir) - subprocess.run(full_cmd, - stdout=stdout_f, - stderr=stderr_f, - env=pg_bin._build_env(None), - shell=True, - check=True) + subprocess.run( + full_cmd, + stdout=stdout_f, + stderr=stderr_f, + env=pg_bin._build_env(None), + shell=True, + check=True, + ) print(f"Done import") # Wait until pageserver persists the files - wait_for_upload(pageserver_http, - uuid.UUID(tenant_id), - uuid.UUID(timeline_id), - lsn_from_hex(last_lsn)) + wait_for_upload( + pageserver_http, uuid.UUID(tenant_id), uuid.UUID(timeline_id), lsn_from_hex(last_lsn) + ) -def export_timeline(args, - psql_path, - pageserver_connstr, - tenant_id, - timeline_id, - last_lsn, - prev_lsn, - tar_filename): +def export_timeline( + args, psql_path, pageserver_connstr, tenant_id, timeline_id, last_lsn, prev_lsn, tar_filename +): # Choose filenames incomplete_filename = tar_filename + ".incomplete" stderr_filename = path.join(args.work_dir, f"{tenant_id}_{timeline_id}.stderr") @@ -507,15 +516,13 @@ def export_timeline(args, # Run export command print(f"Running: {cmd}") - with open(incomplete_filename, 'w') as stdout_f: - with open(stderr_filename, 'w') as stderr_f: + with open(incomplete_filename, "w") as stdout_f: + with open(stderr_filename, "w") as stderr_f: print(f"(capturing output to {incomplete_filename})") pg_bin = PgBin(args.work_dir, args.pg_distrib_dir) - subprocess.run(cmd, - stdout=stdout_f, - stderr=stderr_f, - env=pg_bin._build_env(None), - check=True) + subprocess.run( + cmd, stdout=stdout_f, stderr=stderr_f, env=pg_bin._build_env(None), check=True + ) # Add missing rels pg_bin = PgBin(args.work_dir, args.pg_distrib_dir) @@ -551,27 +558,28 @@ def main(args: argparse.Namespace): for timeline in timelines: # Skip timelines we don't need to export - if args.timelines and timeline['timeline_id'] not in args.timelines: + if args.timelines and timeline["timeline_id"] not in args.timelines: print(f"Skipping timeline {timeline['timeline_id']}") continue # Choose filenames - tar_filename = path.join(args.work_dir, - f"{timeline['tenant_id']}_{timeline['timeline_id']}.tar") + tar_filename = path.join( + args.work_dir, f"{timeline['tenant_id']}_{timeline['timeline_id']}.tar" + ) # Export timeline from old pageserver if args.only_import is False: last_lsn, prev_lsn = get_rlsn( old_pageserver_connstr, - timeline['tenant_id'], - timeline['timeline_id'], + timeline["tenant_id"], + timeline["timeline_id"], ) export_timeline( args, psql_path, old_pageserver_connstr, - timeline['tenant_id'], - timeline['timeline_id'], + timeline["tenant_id"], + timeline["timeline_id"], last_lsn, prev_lsn, tar_filename, @@ -583,8 +591,8 @@ def main(args: argparse.Namespace): psql_path, new_pageserver_connstr, new_http_client, - timeline['tenant_id'], - timeline['timeline_id'], + timeline["tenant_id"], + timeline["timeline_id"], last_lsn, prev_lsn, tar_filename, @@ -592,117 +600,118 @@ def main(args: argparse.Namespace): # Re-export and compare re_export_filename = tar_filename + ".reexport" - export_timeline(args, - psql_path, - new_pageserver_connstr, - timeline['tenant_id'], - timeline['timeline_id'], - last_lsn, - prev_lsn, - re_export_filename) + export_timeline( + args, + psql_path, + new_pageserver_connstr, + timeline["tenant_id"], + timeline["timeline_id"], + last_lsn, + prev_lsn, + re_export_filename, + ) # Check the size is the same - old_size = os.path.getsize(tar_filename), - new_size = os.path.getsize(re_export_filename), + old_size = (os.path.getsize(tar_filename),) + new_size = (os.path.getsize(re_export_filename),) if old_size != new_size: raise AssertionError(f"Sizes don't match old: {old_size} new: {new_size}") -if __name__ == '__main__': +if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument( - '--tenant-id', - dest='tenants', + "--tenant-id", + dest="tenants", required=True, - nargs='+', - help='Id of the tenant to migrate. You can pass multiple arguments', + nargs="+", + help="Id of the tenant to migrate. You can pass multiple arguments", ) parser.add_argument( - '--timeline-id', - dest='timelines', + "--timeline-id", + dest="timelines", required=False, - nargs='+', - help='Id of the timeline to migrate. You can pass multiple arguments', + nargs="+", + help="Id of the timeline to migrate. You can pass multiple arguments", ) parser.add_argument( - '--from-host', - dest='old_pageserver_host', + "--from-host", + dest="old_pageserver_host", required=True, - help='Host of the pageserver to migrate data from', + help="Host of the pageserver to migrate data from", ) parser.add_argument( - '--from-http-port', - dest='old_pageserver_http_port', + "--from-http-port", + dest="old_pageserver_http_port", required=False, type=int, default=9898, - help='HTTP port of the pageserver to migrate data from. Default: 9898', + help="HTTP port of the pageserver to migrate data from. Default: 9898", ) parser.add_argument( - '--from-pg-port', - dest='old_pageserver_pg_port', + "--from-pg-port", + dest="old_pageserver_pg_port", required=False, type=int, default=6400, - help='pg port of the pageserver to migrate data from. Default: 6400', + help="pg port of the pageserver to migrate data from. Default: 6400", ) parser.add_argument( - '--to-host', - dest='new_pageserver_host', + "--to-host", + dest="new_pageserver_host", required=True, - help='Host of the pageserver to migrate data to', + help="Host of the pageserver to migrate data to", ) parser.add_argument( - '--to-http-port', - dest='new_pageserver_http_port', + "--to-http-port", + dest="new_pageserver_http_port", required=False, default=9898, type=int, - help='HTTP port of the pageserver to migrate data to. Default: 9898', + help="HTTP port of the pageserver to migrate data to. Default: 9898", ) parser.add_argument( - '--to-pg-port', - dest='new_pageserver_pg_port', + "--to-pg-port", + dest="new_pageserver_pg_port", required=False, default=6400, type=int, - help='pg port of the pageserver to migrate data to. Default: 6400', + help="pg port of the pageserver to migrate data to. Default: 6400", ) parser.add_argument( - '--ignore-tenant-exists', - dest='ok_if_exists', + "--ignore-tenant-exists", + dest="ok_if_exists", required=False, - help= - 'Ignore error if we are trying to create the tenant that already exists. It can be dangerous if existing tenant already contains some data.', + help="Ignore error if we are trying to create the tenant that already exists. It can be dangerous if existing tenant already contains some data.", ) parser.add_argument( - '--pg-distrib-dir', - dest='pg_distrib_dir', + "--pg-distrib-dir", + dest="pg_distrib_dir", required=False, - default='/usr/local/', - help='Path where postgres binaries are installed. Default: /usr/local/', + default="/usr/local/", + help="Path where postgres binaries are installed. Default: /usr/local/", ) parser.add_argument( - '--psql-path', - dest='psql_path', + "--psql-path", + dest="psql_path", required=False, - default='/usr/local/bin/psql', - help='Path to the psql binary. Default: /usr/local/bin/psql', + default="/usr/local/bin/psql", + help="Path to the psql binary. Default: /usr/local/bin/psql", ) parser.add_argument( - '--only-import', - dest='only_import', + "--only-import", + dest="only_import", required=False, default=False, - action='store_true', - help='Skip export and tenant creation part', + action="store_true", + help="Skip export and tenant creation part", ) parser.add_argument( - '--work-dir', - dest='work_dir', + "--work-dir", + dest="work_dir", required=True, default=False, - help='directory where temporary tar files are stored', + help="directory where temporary tar files are stored", ) args = parser.parse_args() main(args) diff --git a/scripts/generate_perf_report_page.py b/scripts/generate_perf_report_page.py index 23fa4b76a3..b5b49bb600 100755 --- a/scripts/generate_perf_report_page.py +++ b/scripts/generate_perf_report_page.py @@ -1,31 +1,36 @@ #!/usr/bin/env python3 import argparse +import json from dataclasses import dataclass from pathlib import Path -import json from typing import Any, Dict, List, Optional, Tuple, cast + from jinja2 import Template # skip 'input' columns. They are included in the header and just blow the table -EXCLUDE_COLUMNS = frozenset({ - 'scale', - 'duration', - 'number_of_clients', - 'number_of_threads', - 'init_start_timestamp', - 'init_end_timestamp', - 'run_start_timestamp', - 'run_end_timestamp', -}) +EXCLUDE_COLUMNS = frozenset( + { + "scale", + "duration", + "number_of_clients", + "number_of_threads", + "init_start_timestamp", + "init_end_timestamp", + "run_start_timestamp", + "run_end_timestamp", + } +) -KEY_EXCLUDE_FIELDS = frozenset({ - 'init_start_timestamp', - 'init_end_timestamp', - 'run_start_timestamp', - 'run_end_timestamp', -}) -NEGATIVE_COLOR = 'negative' -POSITIVE_COLOR = 'positive' +KEY_EXCLUDE_FIELDS = frozenset( + { + "init_start_timestamp", + "init_end_timestamp", + "run_start_timestamp", + "run_end_timestamp", + } +) +NEGATIVE_COLOR = "negative" +POSITIVE_COLOR = "positive" EPS = 1e-6 @@ -55,75 +60,76 @@ def get_columns(values: List[Dict[Any, Any]]) -> Tuple[List[Tuple[str, str]], Li value_columns = [] common_columns = [] for item in values: - if item['name'] in KEY_EXCLUDE_FIELDS: + if item["name"] in KEY_EXCLUDE_FIELDS: continue - if item['report'] != 'test_param': - value_columns.append(cast(str, item['name'])) + if item["report"] != "test_param": + value_columns.append(cast(str, item["name"])) else: - common_columns.append((cast(str, item['name']), cast(str, item['value']))) + common_columns.append((cast(str, item["name"]), cast(str, item["value"]))) value_columns.sort() common_columns.sort(key=lambda x: x[0]) # sort by name return common_columns, value_columns def format_ratio(ratio: float, report: str) -> Tuple[str, str]: - color = '' - sign = '+' if ratio > 0 else '' + color = "" + sign = "+" if ratio > 0 else "" if abs(ratio) < 0.05: - return f' ({sign}{ratio:.2f})', color + return f" ({sign}{ratio:.2f})", color - if report not in {'test_param', 'higher_is_better', 'lower_is_better'}: - raise ValueError(f'Unknown report type: {report}') + if report not in {"test_param", "higher_is_better", "lower_is_better"}: + raise ValueError(f"Unknown report type: {report}") - if report == 'test_param': - return f'{ratio:.2f}', color + if report == "test_param": + return f"{ratio:.2f}", color if ratio > 0: - if report == 'higher_is_better': + if report == "higher_is_better": color = POSITIVE_COLOR - elif report == 'lower_is_better': + elif report == "lower_is_better": color = NEGATIVE_COLOR elif ratio < 0: - if report == 'higher_is_better': + if report == "higher_is_better": color = NEGATIVE_COLOR - elif report == 'lower_is_better': + elif report == "lower_is_better": color = POSITIVE_COLOR - return f' ({sign}{ratio:.2f})', color + return f" ({sign}{ratio:.2f})", color def extract_value(name: str, suit_run: SuitRun) -> Optional[Dict[str, Any]]: - for item in suit_run.values['data']: - if item['name'] == name: + for item in suit_run.values["data"]: + if item["name"] == name: return cast(Dict[str, Any], item) return None -def get_row_values(columns: List[str], run_result: SuitRun, - prev_result: Optional[SuitRun]) -> List[RowValue]: +def get_row_values( + columns: List[str], run_result: SuitRun, prev_result: Optional[SuitRun] +) -> List[RowValue]: row_values = [] for column in columns: current_value = extract_value(column, run_result) if current_value is None: # should never happen - raise ValueError(f'{column} not found in {run_result.values}') + raise ValueError(f"{column} not found in {run_result.values}") value = current_value["value"] if isinstance(value, float): - value = f'{value:.2f}' + value = f"{value:.2f}" if prev_result is None: - row_values.append(RowValue(value, '', '')) + row_values.append(RowValue(value, "", "")) continue prev_value = extract_value(column, prev_result) if prev_value is None: # this might happen when new metric is added and there is no value for it in previous run # let this be here, TODO add proper handling when this actually happens - raise ValueError(f'{column} not found in previous result') + raise ValueError(f"{column} not found in previous result") # adding `EPS` to each term to avoid ZeroDivisionError when the denominator is zero - ratio = (float(value) + EPS) / (float(prev_value['value']) + EPS) - 1 - ratio_display, color = format_ratio(ratio, current_value['report']) + ratio = (float(value) + EPS) / (float(prev_value["value"]) + EPS) - 1 + ratio_display, color = format_ratio(ratio, current_value["report"]) row_values.append(RowValue(value, color, ratio_display)) return row_values @@ -139,8 +145,10 @@ def prepare_rows_from_runs(value_columns: List[str], runs: List[SuitRun]) -> Lis prev_run = None for run in runs: rows.append( - SuiteRunTableRow(revision=run.revision, - values=get_row_values(value_columns, run, prev_run))) + SuiteRunTableRow( + revision=run.revision, values=get_row_values(value_columns, run, prev_run) + ) + ) prev_run = run return rows @@ -152,27 +160,29 @@ def main(args: argparse.Namespace) -> None: # we have files in form: _.json # fill them in the hashmap so we have grouped items for the # same run configuration (scale, duration etc.) ordered by counter. - for item in sorted(input_dir.iterdir(), key=lambda x: int(x.name.split('_')[0])): + for item in sorted(input_dir.iterdir(), key=lambda x: int(x.name.split("_")[0])): run_data = json.loads(item.read_text()) - revision = run_data['revision'] + revision = run_data["revision"] - for suit_result in run_data['result']: - key = "{}{}".format(run_data['platform'], suit_result['suit']) + for suit_result in run_data["result"]: + key = "{}{}".format(run_data["platform"], suit_result["suit"]) # pack total duration as a synthetic value - total_duration = suit_result['total_duration'] - suit_result['data'].append({ - 'name': 'total_duration', - 'value': total_duration, - 'unit': 's', - 'report': 'lower_is_better', - }) - common_columns, value_columns = get_columns(suit_result['data']) + total_duration = suit_result["total_duration"] + suit_result["data"].append( + { + "name": "total_duration", + "value": total_duration, + "unit": "s", + "report": "lower_is_better", + } + ) + common_columns, value_columns = get_columns(suit_result["data"]) grouped_runs.setdefault( key, SuitRuns( - platform=run_data['platform'], - suit=suit_result['suit'], + platform=run_data["platform"], + suit=suit_result["suit"], common_columns=common_columns, value_columns=value_columns, runs=[], @@ -184,26 +194,26 @@ def main(args: argparse.Namespace) -> None: for result in grouped_runs.values(): suit = result.suit context[suit] = { - 'common_columns': result.common_columns, - 'value_columns': result.value_columns, - 'platform': result.platform, + "common_columns": result.common_columns, + "value_columns": result.value_columns, + "platform": result.platform, # reverse the order so newest results are on top of the table - 'rows': reversed(prepare_rows_from_runs(result.value_columns, result.runs)), + "rows": reversed(prepare_rows_from_runs(result.value_columns, result.runs)), } - template = Template((Path(__file__).parent / 'perf_report_template.html').read_text()) + template = Template((Path(__file__).parent / "perf_report_template.html").read_text()) Path(args.out).write_text(template.render(context=context)) -if __name__ == '__main__': +if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument( - '--input-dir', - dest='input_dir', + "--input-dir", + dest="input_dir", required=True, - help='Directory with jsons generated by the test suite', + help="Directory with jsons generated by the test suite", ) - parser.add_argument('--out', required=True, help='Output html file path') + parser.add_argument("--out", required=True, help="Output html file path") args = parser.parse_args() main(args) diff --git a/scripts/git-upload b/scripts/git-upload index a53987894a..d56c0f8e94 100755 --- a/scripts/git-upload +++ b/scripts/git-upload @@ -1,17 +1,16 @@ #!/usr/bin/env python3 -from contextlib import contextmanager -import shlex -from tempfile import TemporaryDirectory -from distutils.dir_util import copy_tree -from pathlib import Path - import argparse import os +import shlex import shutil import subprocess import sys import textwrap +from contextlib import contextmanager +from distutils.dir_util import copy_tree +from pathlib import Path +from tempfile import TemporaryDirectory from typing import Optional diff --git a/scripts/ingest_perf_test_result.py b/scripts/ingest_perf_test_result.py index 89463c986a..71f7ad3262 100644 --- a/scripts/ingest_perf_test_result.py +++ b/scripts/ingest_perf_test_result.py @@ -1,12 +1,13 @@ #!/usr/bin/env python3 import argparse -from contextlib import contextmanager import json import os +from contextlib import contextmanager +from datetime import datetime +from pathlib import Path + import psycopg2 import psycopg2.extras -from pathlib import Path -from datetime import datetime CREATE_TABLE = """ CREATE TABLE IF NOT EXISTS perf_test_results ( @@ -24,15 +25,15 @@ CREATE TABLE IF NOT EXISTS perf_test_results ( def err(msg): - print(f'error: {msg}') + print(f"error: {msg}") exit(1) @contextmanager def get_connection_cursor(): - connstr = os.getenv('DATABASE_URL') + connstr = os.getenv("DATABASE_URL") if not connstr: - err('DATABASE_URL environment variable is not set') + err("DATABASE_URL environment variable is not set") with psycopg2.connect(connstr) as conn: with conn.cursor() as cur: yield cur @@ -44,33 +45,35 @@ def create_table(cur): def ingest_perf_test_result(cursor, data_dile: Path, recorded_at_timestamp: int) -> int: run_data = json.loads(data_dile.read_text()) - revision = run_data['revision'] - platform = run_data['platform'] + revision = run_data["revision"] + platform = run_data["platform"] - run_result = run_data['result'] + run_result = run_data["result"] args_list = [] for suit_result in run_result: - suit = suit_result['suit'] - total_duration = suit_result['total_duration'] + suit = suit_result["suit"] + total_duration = suit_result["total_duration"] - suit_result['data'].append({ - 'name': 'total_duration', - 'value': total_duration, - 'unit': 's', - 'report': 'lower_is_better', - }) + suit_result["data"].append( + { + "name": "total_duration", + "value": total_duration, + "unit": "s", + "report": "lower_is_better", + } + ) - for metric in suit_result['data']: + for metric in suit_result["data"]: values = { - 'suit': suit, - 'revision': revision, - 'platform': platform, - 'metric_name': metric['name'], - 'metric_value': metric['value'], - 'metric_unit': metric['unit'], - 'metric_report_type': metric['report'], - 'recorded_at_timestamp': datetime.utcfromtimestamp(recorded_at_timestamp), + "suit": suit, + "revision": revision, + "platform": platform, + "metric_name": metric["name"], + "metric_value": metric["value"], + "metric_unit": metric["unit"], + "metric_report_type": metric["report"], + "recorded_at_timestamp": datetime.utcfromtimestamp(recorded_at_timestamp), } args_list.append(values) @@ -104,13 +107,16 @@ def ingest_perf_test_result(cursor, data_dile: Path, recorded_at_timestamp: int) def main(): - parser = argparse.ArgumentParser(description='Perf test result uploader. \ - Database connection string should be provided via DATABASE_URL environment variable', ) + parser = argparse.ArgumentParser( + description="Perf test result uploader. \ + Database connection string should be provided via DATABASE_URL environment variable", + ) parser.add_argument( - '--ingest', + "--ingest", type=Path, - help='Path to perf test result file, or directory with perf test result files') - parser.add_argument('--initdb', action='store_true', help='Initialuze database') + help="Path to perf test result file, or directory with perf test result files", + ) + parser.add_argument("--initdb", action="store_true", help="Initialuze database") args = parser.parse_args() with get_connection_cursor() as cur: @@ -118,19 +124,19 @@ def main(): create_table(cur) if not args.ingest.exists(): - err(f'ingest path {args.ingest} does not exist') + err(f"ingest path {args.ingest} does not exist") if args.ingest: if args.ingest.is_dir(): - for item in sorted(args.ingest.iterdir(), key=lambda x: int(x.name.split('_')[0])): - recorded_at_timestamp = int(item.name.split('_')[0]) + for item in sorted(args.ingest.iterdir(), key=lambda x: int(x.name.split("_")[0])): + recorded_at_timestamp = int(item.name.split("_")[0]) ingested = ingest_perf_test_result(cur, item, recorded_at_timestamp) - print(f'Ingested {ingested} metric values from {item}') + print(f"Ingested {ingested} metric values from {item}") else: - recorded_at_timestamp = int(args.ingest.name.split('_')[0]) + recorded_at_timestamp = int(args.ingest.name.split("_")[0]) ingested = ingest_perf_test_result(cur, args.ingest, recorded_at_timestamp) - print(f'Ingested {ingested} metric values from {args.ingest}') + print(f"Ingested {ingested} metric values from {args.ingest}") -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/test_runner/batch_others/test_ancestor_branch.py b/test_runner/batch_others/test_ancestor_branch.py index c4d36da043..96612a8aef 100644 --- a/test_runner/batch_others/test_ancestor_branch.py +++ b/test_runner/batch_others/test_ancestor_branch.py @@ -13,83 +13,90 @@ def test_ancestor_branch(neon_env_builder: NeonEnvBuilder): # Extend compaction_period and gc_period to disable background compaction and gc. tenant, _ = env.neon_cli.create_tenant( conf={ - 'gc_period': '10 m', - 'gc_horizon': '1048576', - 'checkpoint_distance': '4194304', - 'compaction_period': '10 m', - 'compaction_threshold': '2', - 'compaction_target_size': '4194304', - }) + "gc_period": "10 m", + "gc_horizon": "1048576", + "checkpoint_distance": "4194304", + "compaction_period": "10 m", + "compaction_threshold": "2", + "compaction_target_size": "4194304", + } + ) env.pageserver.safe_psql("failpoints flush-frozen-before-sync=sleep(10000)") - pg_branch0 = env.postgres.create_start('main', tenant_id=tenant) + pg_branch0 = env.postgres.create_start("main", tenant_id=tenant) branch0_cur = pg_branch0.connect().cursor() branch0_timeline = query_scalar(branch0_cur, "SHOW neon.timeline_id") log.info(f"b0 timeline {branch0_timeline}") # Create table, and insert 100k rows. - branch0_lsn = query_scalar(branch0_cur, 'SELECT pg_current_wal_insert_lsn()') + branch0_lsn = query_scalar(branch0_cur, "SELECT pg_current_wal_insert_lsn()") log.info(f"b0 at lsn {branch0_lsn}") - branch0_cur.execute('CREATE TABLE foo (t text) WITH (autovacuum_enabled = off)') - branch0_cur.execute(''' + branch0_cur.execute("CREATE TABLE foo (t text) WITH (autovacuum_enabled = off)") + branch0_cur.execute( + """ INSERT INTO foo SELECT '00112233445566778899AABBCCDDEEFF' || ':branch0:' || g FROM generate_series(1, 100000) g - ''') - lsn_100 = query_scalar(branch0_cur, 'SELECT pg_current_wal_insert_lsn()') - log.info(f'LSN after 100k rows: {lsn_100}') + """ + ) + lsn_100 = query_scalar(branch0_cur, "SELECT pg_current_wal_insert_lsn()") + log.info(f"LSN after 100k rows: {lsn_100}") # Create branch1. - env.neon_cli.create_branch('branch1', 'main', tenant_id=tenant, ancestor_start_lsn=lsn_100) - pg_branch1 = env.postgres.create_start('branch1', tenant_id=tenant) + env.neon_cli.create_branch("branch1", "main", tenant_id=tenant, ancestor_start_lsn=lsn_100) + pg_branch1 = env.postgres.create_start("branch1", tenant_id=tenant) log.info("postgres is running on 'branch1' branch") branch1_cur = pg_branch1.connect().cursor() branch1_timeline = query_scalar(branch1_cur, "SHOW neon.timeline_id") log.info(f"b1 timeline {branch1_timeline}") - branch1_lsn = query_scalar(branch1_cur, 'SELECT pg_current_wal_insert_lsn()') + branch1_lsn = query_scalar(branch1_cur, "SELECT pg_current_wal_insert_lsn()") log.info(f"b1 at lsn {branch1_lsn}") # Insert 100k rows. - branch1_cur.execute(''' + branch1_cur.execute( + """ INSERT INTO foo SELECT '00112233445566778899AABBCCDDEEFF' || ':branch1:' || g FROM generate_series(1, 100000) g - ''') - lsn_200 = query_scalar(branch1_cur, 'SELECT pg_current_wal_insert_lsn()') - log.info(f'LSN after 200k rows: {lsn_200}') + """ + ) + lsn_200 = query_scalar(branch1_cur, "SELECT pg_current_wal_insert_lsn()") + log.info(f"LSN after 200k rows: {lsn_200}") # Create branch2. - env.neon_cli.create_branch('branch2', 'branch1', tenant_id=tenant, ancestor_start_lsn=lsn_200) - pg_branch2 = env.postgres.create_start('branch2', tenant_id=tenant) + env.neon_cli.create_branch("branch2", "branch1", tenant_id=tenant, ancestor_start_lsn=lsn_200) + pg_branch2 = env.postgres.create_start("branch2", tenant_id=tenant) log.info("postgres is running on 'branch2' branch") branch2_cur = pg_branch2.connect().cursor() branch2_timeline = query_scalar(branch2_cur, "SHOW neon.timeline_id") log.info(f"b2 timeline {branch2_timeline}") - branch2_lsn = query_scalar(branch2_cur, 'SELECT pg_current_wal_insert_lsn()') + branch2_lsn = query_scalar(branch2_cur, "SELECT pg_current_wal_insert_lsn()") log.info(f"b2 at lsn {branch2_lsn}") # Insert 100k rows. - branch2_cur.execute(''' + branch2_cur.execute( + """ INSERT INTO foo SELECT '00112233445566778899AABBCCDDEEFF' || ':branch2:' || g FROM generate_series(1, 100000) g - ''') - lsn_300 = query_scalar(branch2_cur, 'SELECT pg_current_wal_insert_lsn()') - log.info(f'LSN after 300k rows: {lsn_300}') + """ + ) + lsn_300 = query_scalar(branch2_cur, "SELECT pg_current_wal_insert_lsn()") + log.info(f"LSN after 300k rows: {lsn_300}") # Run compaction on branch1. - compact = f'compact {tenant.hex} {branch1_timeline} {lsn_200}' + compact = f"compact {tenant.hex} {branch1_timeline} {lsn_200}" log.info(compact) env.pageserver.safe_psql(compact) - assert query_scalar(branch0_cur, 'SELECT count(*) FROM foo') == 100000 + assert query_scalar(branch0_cur, "SELECT count(*) FROM foo") == 100000 - assert query_scalar(branch1_cur, 'SELECT count(*) FROM foo') == 200000 + assert query_scalar(branch1_cur, "SELECT count(*) FROM foo") == 200000 - assert query_scalar(branch2_cur, 'SELECT count(*) FROM foo') == 300000 + assert query_scalar(branch2_cur, "SELECT count(*) FROM foo") == 300000 diff --git a/test_runner/batch_others/test_auth.py b/test_runner/batch_others/test_auth.py index 0fd0a5d7e3..16d6ae45c3 100644 --- a/test_runner/batch_others/test_auth.py +++ b/test_runner/batch_others/test_auth.py @@ -1,7 +1,8 @@ from contextlib import closing from uuid import uuid4 -from fixtures.neon_fixtures import NeonEnvBuilder, NeonPageserverApiException + import pytest +from fixtures.neon_fixtures import NeonEnvBuilder, NeonPageserverApiException def test_pageserver_auth(neon_env_builder: NeonEnvBuilder): @@ -23,41 +24,46 @@ def test_pageserver_auth(neon_env_builder: NeonEnvBuilder): ps.safe_psql("set FOO", password=tenant_token) ps.safe_psql("set FOO", password=management_token) - new_timeline_id = env.neon_cli.create_branch('test_pageserver_auth', - tenant_id=env.initial_tenant) + new_timeline_id = env.neon_cli.create_branch( + "test_pageserver_auth", tenant_id=env.initial_tenant + ) # tenant can create branches - tenant_http_client.timeline_create(tenant_id=env.initial_tenant, - ancestor_timeline_id=new_timeline_id) + tenant_http_client.timeline_create( + tenant_id=env.initial_tenant, ancestor_timeline_id=new_timeline_id + ) # console can create branches for tenant - management_http_client.timeline_create(tenant_id=env.initial_tenant, - ancestor_timeline_id=new_timeline_id) + management_http_client.timeline_create( + tenant_id=env.initial_tenant, ancestor_timeline_id=new_timeline_id + ) # fail to create branch using token with different tenant_id - with pytest.raises(NeonPageserverApiException, - match='Forbidden: Tenant id mismatch. Permission denied'): - invalid_tenant_http_client.timeline_create(tenant_id=env.initial_tenant, - ancestor_timeline_id=new_timeline_id) + with pytest.raises( + NeonPageserverApiException, match="Forbidden: Tenant id mismatch. Permission denied" + ): + invalid_tenant_http_client.timeline_create( + tenant_id=env.initial_tenant, ancestor_timeline_id=new_timeline_id + ) # create tenant using management token management_http_client.tenant_create() # fail to create tenant using tenant token with pytest.raises( - NeonPageserverApiException, - match='Forbidden: Attempt to access management api with tenant scope. Permission denied' + NeonPageserverApiException, + match="Forbidden: Attempt to access management api with tenant scope. Permission denied", ): tenant_http_client.tenant_create() -@pytest.mark.parametrize('with_safekeepers', [False, True]) +@pytest.mark.parametrize("with_safekeepers", [False, True]) def test_compute_auth_to_pageserver(neon_env_builder: NeonEnvBuilder, with_safekeepers: bool): neon_env_builder.auth_enabled = True if with_safekeepers: neon_env_builder.num_safekeepers = 3 env = neon_env_builder.init_start() - branch = f'test_compute_auth_to_pageserver{with_safekeepers}' + branch = f"test_compute_auth_to_pageserver{with_safekeepers}" env.neon_cli.create_branch(branch) pg = env.postgres.create_start(branch) @@ -65,7 +71,7 @@ def test_compute_auth_to_pageserver(neon_env_builder: NeonEnvBuilder, with_safek with conn.cursor() as cur: # we rely upon autocommit after each statement # as waiting for acceptors happens there - cur.execute('CREATE TABLE t(key int primary key, value text)') + cur.execute("CREATE TABLE t(key int primary key, value text)") cur.execute("INSERT INTO t SELECT generate_series(1,100000), 'payload'") - cur.execute('SELECT sum(key) FROM t') - assert cur.fetchone() == (5000050000, ) + cur.execute("SELECT sum(key) FROM t") + assert cur.fetchone() == (5000050000,) diff --git a/test_runner/batch_others/test_backpressure.py b/test_runner/batch_others/test_backpressure.py index 4ca03b102b..a81fa380a9 100644 --- a/test_runner/batch_others/test_backpressure.py +++ b/test_runner/batch_others/test_backpressure.py @@ -1,13 +1,13 @@ +import threading +import time from contextlib import closing, contextmanager + import psycopg2.extras import pytest -from fixtures.neon_fixtures import NeonEnvBuilder from fixtures.log_helper import log -import time -from fixtures.neon_fixtures import Postgres -import threading +from fixtures.neon_fixtures import NeonEnvBuilder, Postgres -pytest_plugins = ("fixtures.neon_fixtures") +pytest_plugins = "fixtures.neon_fixtures" @contextmanager @@ -44,7 +44,8 @@ def check_backpressure(pg: Postgres, stop_event: threading.Event, polling_interv with pg_cur(pg) as cur: while not stop_event.is_set(): try: - cur.execute(''' + cur.execute( + """ select pg_wal_lsn_diff(pg_current_wal_flush_lsn(),received_lsn) as received_lsn_lag, pg_wal_lsn_diff(pg_current_wal_flush_lsn(),disk_consistent_lsn) as disk_consistent_lsn_lag, pg_wal_lsn_diff(pg_current_wal_flush_lsn(),remote_consistent_lsn) as remote_consistent_lsn_lag, @@ -52,16 +53,19 @@ def check_backpressure(pg: Postgres, stop_event: threading.Event, polling_interv pg_size_pretty(pg_wal_lsn_diff(pg_current_wal_flush_lsn(),disk_consistent_lsn)), pg_size_pretty(pg_wal_lsn_diff(pg_current_wal_flush_lsn(),remote_consistent_lsn)) from backpressure_lsns(); - ''') + """ + ) res = cur.fetchone() received_lsn_lag = res[0] disk_consistent_lsn_lag = res[1] remote_consistent_lsn_lag = res[2] - log.info(f"received_lsn_lag = {received_lsn_lag} ({res[3]}), " - f"disk_consistent_lsn_lag = {disk_consistent_lsn_lag} ({res[4]}), " - f"remote_consistent_lsn_lag = {remote_consistent_lsn_lag} ({res[5]})") + log.info( + f"received_lsn_lag = {received_lsn_lag} ({res[3]}), " + f"disk_consistent_lsn_lag = {disk_consistent_lsn_lag} ({res[4]}), " + f"remote_consistent_lsn_lag = {remote_consistent_lsn_lag} ({res[5]})" + ) # Since feedback from pageserver is not immediate, we should allow some lag overflow lag_overflow = 5 * 1024 * 1024 # 5MB @@ -71,7 +75,9 @@ def check_backpressure(pg: Postgres, stop_event: threading.Event, polling_interv if max_replication_flush_lag_bytes > 0: assert disk_consistent_lsn_lag < max_replication_flush_lag_bytes + lag_overflow if max_replication_apply_lag_bytes > 0: - assert remote_consistent_lsn_lag < max_replication_apply_lag_bytes + lag_overflow + assert ( + remote_consistent_lsn_lag < max_replication_apply_lag_bytes + lag_overflow + ) time.sleep(polling_interval) @@ -79,7 +85,7 @@ def check_backpressure(pg: Postgres, stop_event: threading.Event, polling_interv log.info(f"backpressure check query failed: {e}") stop_event.set() - log.info('check thread stopped') + log.info("check thread stopped") # This test illustrates how to tune backpressure to control the lag @@ -94,10 +100,11 @@ def check_backpressure(pg: Postgres, stop_event: threading.Event, polling_interv def test_backpressure_received_lsn_lag(neon_env_builder: NeonEnvBuilder): env = neon_env_builder.init_start() # Create a branch for us - env.neon_cli.create_branch('test_backpressure') + env.neon_cli.create_branch("test_backpressure") - pg = env.postgres.create_start('test_backpressure', - config_lines=['max_replication_write_lag=30MB']) + pg = env.postgres.create_start( + "test_backpressure", config_lines=["max_replication_write_lag=30MB"] + ) log.info("postgres is running on 'test_backpressure' branch") # setup check thread @@ -131,23 +138,29 @@ def test_backpressure_received_lsn_lag(neon_env_builder: NeonEnvBuilder): rows_inserted += 100000 except Exception as e: if check_thread.is_alive(): - log.info('stopping check thread') + log.info("stopping check thread") check_stop_event.set() check_thread.join() - assert False, f"Exception {e} while inserting rows, but WAL lag is within configured threshold. That means backpressure is not tuned properly" + assert ( + False + ), f"Exception {e} while inserting rows, but WAL lag is within configured threshold. That means backpressure is not tuned properly" else: - assert False, f"Exception {e} while inserting rows and WAL lag overflowed configured threshold. That means backpressure doesn't work." + assert ( + False + ), f"Exception {e} while inserting rows and WAL lag overflowed configured threshold. That means backpressure doesn't work." log.info(f"inserted {rows_inserted} rows") if check_thread.is_alive(): - log.info('stopping check thread') + log.info("stopping check thread") check_stop_event.set() check_thread.join() - log.info('check thread stopped') + log.info("check thread stopped") else: - assert False, "WAL lag overflowed configured threshold. That means backpressure doesn't work." + assert ( + False + ), "WAL lag overflowed configured threshold. That means backpressure doesn't work." -#TODO test_backpressure_disk_consistent_lsn_lag. Play with pageserver's checkpoint settings -#TODO test_backpressure_remote_consistent_lsn_lag +# TODO test_backpressure_disk_consistent_lsn_lag. Play with pageserver's checkpoint settings +# TODO test_backpressure_remote_consistent_lsn_lag diff --git a/test_runner/batch_others/test_basebackup_error.py b/test_runner/batch_others/test_basebackup_error.py index 0909ed98a7..9960f3afbf 100644 --- a/test_runner/batch_others/test_basebackup_error.py +++ b/test_runner/batch_others/test_basebackup_error.py @@ -1,5 +1,4 @@ import pytest - from fixtures.neon_fixtures import NeonEnv @@ -15,4 +14,4 @@ def test_basebackup_error(neon_simple_env: NeonEnv): env.pageserver.safe_psql(f"failpoints basebackup-before-control-file=return") with pytest.raises(Exception, match="basebackup-before-control-file"): - pg = env.postgres.create_start('test_basebackup_error') + pg = env.postgres.create_start("test_basebackup_error") diff --git a/test_runner/batch_others/test_branch_and_gc.py b/test_runner/batch_others/test_branch_and_gc.py index 8e433f65ad..bc8374543f 100644 --- a/test_runner/batch_others/test_branch_and_gc.py +++ b/test_runner/batch_others/test_branch_and_gc.py @@ -1,6 +1,7 @@ import threading -import pytest import time + +import pytest from fixtures.log_helper import log from fixtures.neon_fixtures import NeonEnv from fixtures.utils import lsn_from_hex, query_scalar @@ -49,55 +50,52 @@ def test_branch_and_gc(neon_simple_env: NeonEnv): tenant, _ = env.neon_cli.create_tenant( conf={ # disable background GC - 'gc_period': '10 m', - 'gc_horizon': f'{10 * 1024 ** 3}', - + "gc_period": "10 m", + "gc_horizon": f"{10 * 1024 ** 3}", # small checkpoint distance to create more delta layer files - 'checkpoint_distance': f'{1024 ** 2}', - + "checkpoint_distance": f"{1024 ** 2}", # set the target size to be large to allow the image layer to cover the whole key space - 'compaction_target_size': f'{1024 ** 3}', - + "compaction_target_size": f"{1024 ** 3}", # tweak the default settings to allow quickly create image layers and L1 layers - 'compaction_period': '1 s', - 'compaction_threshold': '2', - 'image_creation_threshold': '1', - + "compaction_period": "1 s", + "compaction_threshold": "2", + "image_creation_threshold": "1", # set PITR interval to be small, so we can do GC - 'pitr_interval': '1 s' - }) + "pitr_interval": "1 s", + } + ) - timeline_main = env.neon_cli.create_timeline(f'test_main', tenant_id=tenant) - pg_main = env.postgres.create_start('test_main', tenant_id=tenant) + timeline_main = env.neon_cli.create_timeline(f"test_main", tenant_id=tenant) + pg_main = env.postgres.create_start("test_main", tenant_id=tenant) main_cur = pg_main.connect().cursor() main_cur.execute( "CREATE TABLE foo(key serial primary key, t text default 'foooooooooooooooooooooooooooooooooooooooooooooooooooo')" ) - main_cur.execute('INSERT INTO foo SELECT FROM generate_series(1, 100000)') - lsn1 = query_scalar(main_cur, 'SELECT pg_current_wal_insert_lsn()') - log.info(f'LSN1: {lsn1}') + main_cur.execute("INSERT INTO foo SELECT FROM generate_series(1, 100000)") + lsn1 = query_scalar(main_cur, "SELECT pg_current_wal_insert_lsn()") + log.info(f"LSN1: {lsn1}") - main_cur.execute('INSERT INTO foo SELECT FROM generate_series(1, 100000)') - lsn2 = query_scalar(main_cur, 'SELECT pg_current_wal_insert_lsn()') - log.info(f'LSN2: {lsn2}') + main_cur.execute("INSERT INTO foo SELECT FROM generate_series(1, 100000)") + lsn2 = query_scalar(main_cur, "SELECT pg_current_wal_insert_lsn()") + log.info(f"LSN2: {lsn2}") # Set the GC horizon so that lsn1 is inside the horizon, which means # we can create a new branch starting from lsn1. env.pageserver.safe_psql( - f'do_gc {tenant.hex} {timeline_main.hex} {lsn_from_hex(lsn2) - lsn_from_hex(lsn1) + 1024}') + f"do_gc {tenant.hex} {timeline_main.hex} {lsn_from_hex(lsn2) - lsn_from_hex(lsn1) + 1024}" + ) - env.neon_cli.create_branch('test_branch', - 'test_main', - tenant_id=tenant, - ancestor_start_lsn=lsn1) - pg_branch = env.postgres.create_start('test_branch', tenant_id=tenant) + env.neon_cli.create_branch( + "test_branch", "test_main", tenant_id=tenant, ancestor_start_lsn=lsn1 + ) + pg_branch = env.postgres.create_start("test_branch", tenant_id=tenant) branch_cur = pg_branch.connect().cursor() - branch_cur.execute('INSERT INTO foo SELECT FROM generate_series(1, 100000)') + branch_cur.execute("INSERT INTO foo SELECT FROM generate_series(1, 100000)") - assert query_scalar(branch_cur, 'SELECT count(*) FROM foo') == 200000 + assert query_scalar(branch_cur, "SELECT count(*) FROM foo") == 200000 # This test simulates a race condition happening when branch creation and GC are performed concurrently. @@ -120,32 +118,31 @@ def test_branch_creation_before_gc(neon_simple_env: NeonEnv): tenant, _ = env.neon_cli.create_tenant( conf={ # disable background GC - 'gc_period': '10 m', - 'gc_horizon': f'{10 * 1024 ** 3}', - + "gc_period": "10 m", + "gc_horizon": f"{10 * 1024 ** 3}", # small checkpoint distance to create more delta layer files - 'checkpoint_distance': f'{1024 ** 2}', - + "checkpoint_distance": f"{1024 ** 2}", # set the target size to be large to allow the image layer to cover the whole key space - 'compaction_target_size': f'{1024 ** 3}', - + "compaction_target_size": f"{1024 ** 3}", # tweak the default settings to allow quickly create image layers and L1 layers - 'compaction_period': '1 s', - 'compaction_threshold': '2', - 'image_creation_threshold': '1', - + "compaction_period": "1 s", + "compaction_threshold": "2", + "image_creation_threshold": "1", # set PITR interval to be small, so we can do GC - 'pitr_interval': '0 s' - }) + "pitr_interval": "0 s", + } + ) - b0 = env.neon_cli.create_branch('b0', tenant_id=tenant) - pg0 = env.postgres.create_start('b0', tenant_id=tenant) - res = pg0.safe_psql_many(queries=[ - "CREATE TABLE t(key serial primary key)", - "INSERT INTO t SELECT FROM generate_series(1, 100000)", - "SELECT pg_current_wal_insert_lsn()", - "INSERT INTO t SELECT FROM generate_series(1, 100000)", - ]) + b0 = env.neon_cli.create_branch("b0", tenant_id=tenant) + pg0 = env.postgres.create_start("b0", tenant_id=tenant) + res = pg0.safe_psql_many( + queries=[ + "CREATE TABLE t(key serial primary key)", + "INSERT INTO t SELECT FROM generate_series(1, 100000)", + "SELECT pg_current_wal_insert_lsn()", + "INSERT INTO t SELECT FROM generate_series(1, 100000)", + ] + ) lsn = res[2][0][0] # Use `failpoint=sleep` and `threading` to make the GC iteration triggers *before* the @@ -166,6 +163,6 @@ def test_branch_creation_before_gc(neon_simple_env: NeonEnv): # The starting LSN is invalid as the corresponding record is scheduled to be removed by in-queue GC. with pytest.raises(Exception, match="invalid branch start lsn"): - env.neon_cli.create_branch('b1', 'b0', tenant_id=tenant, ancestor_start_lsn=lsn) + env.neon_cli.create_branch("b1", "b0", tenant_id=tenant, ancestor_start_lsn=lsn) thread.join() diff --git a/test_runner/batch_others/test_branch_behind.py b/test_runner/batch_others/test_branch_behind.py index 95f478dda8..51946380d2 100644 --- a/test_runner/batch_others/test_branch_behind.py +++ b/test_runner/batch_others/test_branch_behind.py @@ -1,8 +1,8 @@ import psycopg2.extras import pytest from fixtures.log_helper import log -from fixtures.utils import print_gc_result, query_scalar from fixtures.neon_fixtures import NeonEnvBuilder +from fixtures.utils import print_gc_result, query_scalar # @@ -21,8 +21,8 @@ def test_branch_behind(neon_env_builder: NeonEnvBuilder): env = neon_env_builder.init_start() # Branch at the point where only 100 rows were inserted - env.neon_cli.create_branch('test_branch_behind') - pgmain = env.postgres.create_start('test_branch_behind') + env.neon_cli.create_branch("test_branch_behind") + pgmain = env.postgres.create_start("test_branch_behind") log.info("postgres is running on 'test_branch_behind' branch") main_cur = pgmain.connect().cursor() @@ -30,80 +30,86 @@ def test_branch_behind(neon_env_builder: NeonEnvBuilder): timeline = query_scalar(main_cur, "SHOW neon.timeline_id") # Create table, and insert the first 100 rows - main_cur.execute('CREATE TABLE foo (t text)') + main_cur.execute("CREATE TABLE foo (t text)") # keep some early lsn to test branch creation on out of date lsn - gced_lsn = query_scalar(main_cur, 'SELECT pg_current_wal_insert_lsn()') + gced_lsn = query_scalar(main_cur, "SELECT pg_current_wal_insert_lsn()") - main_cur.execute(''' + main_cur.execute( + """ INSERT INTO foo SELECT 'long string to consume some space' || g FROM generate_series(1, 100) g - ''') - lsn_a = query_scalar(main_cur, 'SELECT pg_current_wal_insert_lsn()') - log.info(f'LSN after 100 rows: {lsn_a}') + """ + ) + lsn_a = query_scalar(main_cur, "SELECT pg_current_wal_insert_lsn()") + log.info(f"LSN after 100 rows: {lsn_a}") # Insert some more rows. (This generates enough WAL to fill a few segments.) - main_cur.execute(''' + main_cur.execute( + """ INSERT INTO foo SELECT 'long string to consume some space' || g FROM generate_series(1, 200000) g - ''') - lsn_b = query_scalar(main_cur, 'SELECT pg_current_wal_insert_lsn()') - log.info(f'LSN after 200100 rows: {lsn_b}') + """ + ) + lsn_b = query_scalar(main_cur, "SELECT pg_current_wal_insert_lsn()") + log.info(f"LSN after 200100 rows: {lsn_b}") # Branch at the point where only 100 rows were inserted - env.neon_cli.create_branch('test_branch_behind_hundred', - 'test_branch_behind', - ancestor_start_lsn=lsn_a) + env.neon_cli.create_branch( + "test_branch_behind_hundred", "test_branch_behind", ancestor_start_lsn=lsn_a + ) # Insert many more rows. This generates enough WAL to fill a few segments. - main_cur.execute(''' + main_cur.execute( + """ INSERT INTO foo SELECT 'long string to consume some space' || g FROM generate_series(1, 200000) g - ''') - lsn_c = query_scalar(main_cur, 'SELECT pg_current_wal_insert_lsn()') + """ + ) + lsn_c = query_scalar(main_cur, "SELECT pg_current_wal_insert_lsn()") - log.info(f'LSN after 400100 rows: {lsn_c}') + log.info(f"LSN after 400100 rows: {lsn_c}") # Branch at the point where only 200100 rows were inserted - env.neon_cli.create_branch('test_branch_behind_more', - 'test_branch_behind', - ancestor_start_lsn=lsn_b) + env.neon_cli.create_branch( + "test_branch_behind_more", "test_branch_behind", ancestor_start_lsn=lsn_b + ) - pg_hundred = env.postgres.create_start('test_branch_behind_hundred') - pg_more = env.postgres.create_start('test_branch_behind_more') + pg_hundred = env.postgres.create_start("test_branch_behind_hundred") + pg_more = env.postgres.create_start("test_branch_behind_more") # On the 'hundred' branch, we should see only 100 rows hundred_cur = pg_hundred.connect().cursor() - assert query_scalar(hundred_cur, 'SELECT count(*) FROM foo') == 100 + assert query_scalar(hundred_cur, "SELECT count(*) FROM foo") == 100 # On the 'more' branch, we should see 100200 rows more_cur = pg_more.connect().cursor() - assert query_scalar(more_cur, 'SELECT count(*) FROM foo') == 200100 + assert query_scalar(more_cur, "SELECT count(*) FROM foo") == 200100 # All the rows are visible on the main branch - assert query_scalar(main_cur, 'SELECT count(*) FROM foo') == 400100 + assert query_scalar(main_cur, "SELECT count(*) FROM foo") == 400100 # Check bad lsn's for branching # branch at segment boundary - env.neon_cli.create_branch('test_branch_segment_boundary', - 'test_branch_behind', - ancestor_start_lsn="0/3000000") - pg = env.postgres.create_start('test_branch_segment_boundary') - assert pg.safe_psql('SELECT 1')[0][0] == 1 + env.neon_cli.create_branch( + "test_branch_segment_boundary", "test_branch_behind", ancestor_start_lsn="0/3000000" + ) + pg = env.postgres.create_start("test_branch_segment_boundary") + assert pg.safe_psql("SELECT 1")[0][0] == 1 # branch at pre-initdb lsn with pytest.raises(Exception, match="invalid branch start lsn"): - env.neon_cli.create_branch('test_branch_preinitdb', ancestor_start_lsn="0/42") + env.neon_cli.create_branch("test_branch_preinitdb", ancestor_start_lsn="0/42") # branch at pre-ancestor lsn with pytest.raises(Exception, match="less than timeline ancestor lsn"): - env.neon_cli.create_branch('test_branch_preinitdb', - 'test_branch_behind', - ancestor_start_lsn="0/42") + env.neon_cli.create_branch( + "test_branch_preinitdb", "test_branch_behind", ancestor_start_lsn="0/42" + ) # check that we cannot create branch based on garbage collected data with env.pageserver.cursor(cursor_factory=psycopg2.extras.DictCursor) as pscur: @@ -114,13 +120,13 @@ def test_branch_behind(neon_env_builder: NeonEnvBuilder): with pytest.raises(Exception, match="invalid branch start lsn"): # this gced_lsn is pretty random, so if gc is disabled this woudln't fail - env.neon_cli.create_branch('test_branch_create_fail', - 'test_branch_behind', - ancestor_start_lsn=gced_lsn) + env.neon_cli.create_branch( + "test_branch_create_fail", "test_branch_behind", ancestor_start_lsn=gced_lsn + ) # check that after gc everything is still there - assert query_scalar(hundred_cur, 'SELECT count(*) FROM foo') == 100 + assert query_scalar(hundred_cur, "SELECT count(*) FROM foo") == 100 - assert query_scalar(more_cur, 'SELECT count(*) FROM foo') == 200100 + assert query_scalar(more_cur, "SELECT count(*) FROM foo") == 200100 - assert query_scalar(main_cur, 'SELECT count(*) FROM foo') == 400100 + assert query_scalar(main_cur, "SELECT count(*) FROM foo") == 400100 diff --git a/test_runner/batch_others/test_branching.py b/test_runner/batch_others/test_branching.py index c61bac7a58..2d08b07f82 100644 --- a/test_runner/batch_others/test_branching.py +++ b/test_runner/batch_others/test_branching.py @@ -1,10 +1,11 @@ -from typing import List -import threading -import pytest -from fixtures.neon_fixtures import NeonEnv, PgBin, Postgres -import time import random +import threading +import time +from typing import List + +import pytest from fixtures.log_helper import log +from fixtures.neon_fixtures import NeonEnv, PgBin, Postgres from performance.test_perf_pgbench import get_scales_matrix @@ -20,38 +21,37 @@ from performance.test_perf_pgbench import get_scales_matrix @pytest.mark.parametrize("n_branches", [10]) @pytest.mark.parametrize("scale", get_scales_matrix(1)) @pytest.mark.parametrize("ty", ["cascade", "flat"]) -def test_branching_with_pgbench(neon_simple_env: NeonEnv, - pg_bin: PgBin, - n_branches: int, - scale: int, - ty: str): +def test_branching_with_pgbench( + neon_simple_env: NeonEnv, pg_bin: PgBin, n_branches: int, scale: int, ty: str +): env = neon_simple_env # Use aggressive GC and checkpoint settings, so that we also exercise GC during the test tenant, _ = env.neon_cli.create_tenant( - conf={ - 'gc_period': '5 s', - 'gc_horizon': f'{1024 ** 2}', - 'checkpoint_distance': f'{1024 ** 2}', - 'compaction_target_size': f'{1024 ** 2}', - # set PITR interval to be small, so we can do GC - 'pitr_interval': '5 s' - }) + conf={ + "gc_period": "5 s", + "gc_horizon": f"{1024 ** 2}", + "checkpoint_distance": f"{1024 ** 2}", + "compaction_target_size": f"{1024 ** 2}", + # set PITR interval to be small, so we can do GC + "pitr_interval": "5 s", + } + ) def run_pgbench(pg: Postgres): connstr = pg.connstr() log.info(f"Start a pgbench workload on pg {connstr}") - pg_bin.run_capture(['pgbench', '-i', f'-s{scale}', connstr]) - pg_bin.run_capture(['pgbench', '-T15', connstr]) + pg_bin.run_capture(["pgbench", "-i", f"-s{scale}", connstr]) + pg_bin.run_capture(["pgbench", "-T15", connstr]) - env.neon_cli.create_branch('b0', tenant_id=tenant) + env.neon_cli.create_branch("b0", tenant_id=tenant) pgs: List[Postgres] = [] - pgs.append(env.postgres.create_start('b0', tenant_id=tenant)) + pgs.append(env.postgres.create_start("b0", tenant_id=tenant)) threads: List[threading.Thread] = [] - threads.append(threading.Thread(target=run_pgbench, args=(pgs[0], ), daemon=True)) + threads.append(threading.Thread(target=run_pgbench, args=(pgs[0],), daemon=True)) threads[-1].start() thread_limit = 4 @@ -72,18 +72,18 @@ def test_branching_with_pgbench(neon_simple_env: NeonEnv, threads = [] if ty == "cascade": - env.neon_cli.create_branch('b{}'.format(i + 1), 'b{}'.format(i), tenant_id=tenant) + env.neon_cli.create_branch("b{}".format(i + 1), "b{}".format(i), tenant_id=tenant) else: - env.neon_cli.create_branch('b{}'.format(i + 1), 'b0', tenant_id=tenant) + env.neon_cli.create_branch("b{}".format(i + 1), "b0", tenant_id=tenant) - pgs.append(env.postgres.create_start('b{}'.format(i + 1), tenant_id=tenant)) + pgs.append(env.postgres.create_start("b{}".format(i + 1), tenant_id=tenant)) - threads.append(threading.Thread(target=run_pgbench, args=(pgs[-1], ), daemon=True)) + threads.append(threading.Thread(target=run_pgbench, args=(pgs[-1],), daemon=True)) threads[-1].start() for thread in threads: thread.join() for pg in pgs: - res = pg.safe_psql('SELECT count(*) from pgbench_accounts') - assert res[0] == (100000 * scale, ) + res = pg.safe_psql("SELECT count(*) from pgbench_accounts") + assert res[0] == (100000 * scale,) diff --git a/test_runner/batch_others/test_broken_timeline.py b/test_runner/batch_others/test_broken_timeline.py index b9e5f637ab..b96a7895eb 100644 --- a/test_runner/batch_others/test_broken_timeline.py +++ b/test_runner/batch_others/test_broken_timeline.py @@ -1,12 +1,12 @@ +import concurrent.futures +import os +from contextlib import closing from typing import List, Tuple from uuid import UUID -import pytest -import concurrent.futures -from contextlib import closing -from fixtures.neon_fixtures import NeonEnvBuilder, NeonEnv, Postgres -from fixtures.log_helper import log -import os +import pytest +from fixtures.log_helper import log +from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder, Postgres from fixtures.utils import query_scalar @@ -24,7 +24,7 @@ def test_broken_timeline(neon_env_builder: NeonEnvBuilder): tenant_id = tenant_id_uuid.hex timeline_id = timeline_id_uuid.hex - pg = env.postgres.create_start(f'main', tenant_id=tenant_id_uuid) + pg = env.postgres.create_start(f"main", tenant_id=tenant_id_uuid) with pg.cursor() as cur: cur.execute("CREATE TABLE t(key int primary key, value text)") cur.execute("INSERT INTO t SELECT generate_series(1,100), 'payload'") @@ -42,7 +42,7 @@ def test_broken_timeline(neon_env_builder: NeonEnvBuilder): # Corrupt metadata file on timeline 1 (tenant1, timeline1, pg1) = tenant_timelines[1] metadata_path = "{}/tenants/{}/timelines/{}/metadata".format(env.repo_dir, tenant1, timeline1) - print(f'overwriting metadata file at {metadata_path}') + print(f"overwriting metadata file at {metadata_path}") f = open(metadata_path, "w") f.write("overwritten with garbage!") f.close() @@ -52,17 +52,17 @@ def test_broken_timeline(neon_env_builder: NeonEnvBuilder): (tenant2, timeline2, pg2) = tenant_timelines[2] timeline_path = "{}/tenants/{}/timelines/{}/".format(env.repo_dir, tenant2, timeline2) for filename in os.listdir(timeline_path): - if filename.startswith('00000'): + if filename.startswith("00000"): # Looks like a layer file. Remove it - os.remove(f'{timeline_path}/{filename}') + os.remove(f"{timeline_path}/{filename}") # Corrupt layer files file on timeline 3 (tenant3, timeline3, pg3) = tenant_timelines[3] timeline_path = "{}/tenants/{}/timelines/{}/".format(env.repo_dir, tenant3, timeline3) for filename in os.listdir(timeline_path): - if filename.startswith('00000'): + if filename.startswith("00000"): # Looks like a layer file. Corrupt it - f = open(f'{timeline_path}/{filename}', "w") + f = open(f"{timeline_path}/{filename}", "w") f.write("overwritten with garbage!") f.close() @@ -77,7 +77,7 @@ def test_broken_timeline(neon_env_builder: NeonEnvBuilder): (tenant, timeline, pg) = tenant_timelines[n] with pytest.raises(Exception, match="Cannot load local timeline") as err: pg.start() - log.info(f'compute startup failed as expected: {err}') + log.info(f"compute startup failed as expected: {err}") def test_create_multiple_timelines_parallel(neon_simple_env: NeonEnv): @@ -87,9 +87,10 @@ def test_create_multiple_timelines_parallel(neon_simple_env: NeonEnv): with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor: futures = [ - executor.submit(env.neon_cli.create_timeline, - f"test-create-multiple-timelines-{i}", - tenant_id) for i in range(4) + executor.submit( + env.neon_cli.create_timeline, f"test-create-multiple-timelines-{i}", tenant_id + ) + for i in range(4) ] for future in futures: future.result() diff --git a/test_runner/batch_others/test_clog_truncate.py b/test_runner/batch_others/test_clog_truncate.py index cdb577f480..1f5df1c130 100644 --- a/test_runner/batch_others/test_clog_truncate.py +++ b/test_runner/batch_others/test_clog_truncate.py @@ -1,10 +1,9 @@ -import time import os - +import time from contextlib import closing -from fixtures.neon_fixtures import NeonEnv from fixtures.log_helper import log +from fixtures.neon_fixtures import NeonEnv from fixtures.utils import query_scalar @@ -13,40 +12,40 @@ from fixtures.utils import query_scalar # def test_clog_truncate(neon_simple_env: NeonEnv): env = neon_simple_env - env.neon_cli.create_branch('test_clog_truncate', 'empty') + env.neon_cli.create_branch("test_clog_truncate", "empty") # set aggressive autovacuum to make sure that truncation will happen config = [ - 'autovacuum_max_workers=10', - 'autovacuum_vacuum_threshold=0', - 'autovacuum_vacuum_insert_threshold=0', - 'autovacuum_vacuum_cost_delay=0', - 'autovacuum_vacuum_cost_limit=10000', - 'autovacuum_naptime =1s', - 'autovacuum_freeze_max_age=100000' + "autovacuum_max_workers=10", + "autovacuum_vacuum_threshold=0", + "autovacuum_vacuum_insert_threshold=0", + "autovacuum_vacuum_cost_delay=0", + "autovacuum_vacuum_cost_limit=10000", + "autovacuum_naptime =1s", + "autovacuum_freeze_max_age=100000", ] - pg = env.postgres.create_start('test_clog_truncate', config_lines=config) - log.info('postgres is running on test_clog_truncate branch') + pg = env.postgres.create_start("test_clog_truncate", config_lines=config) + log.info("postgres is running on test_clog_truncate branch") # Install extension containing function needed for test - pg.safe_psql('CREATE EXTENSION neon_test_utils') + pg.safe_psql("CREATE EXTENSION neon_test_utils") # Consume many xids to advance clog with pg.cursor() as cur: - cur.execute('select test_consume_xids(1000*1000*10);') - log.info('xids consumed') + cur.execute("select test_consume_xids(1000*1000*10);") + log.info("xids consumed") # call a checkpoint to trigger TruncateSubtrans - cur.execute('CHECKPOINT;') + cur.execute("CHECKPOINT;") # ensure WAL flush - cur.execute('select txid_current()') + cur.execute("select txid_current()") log.info(cur.fetchone()) # wait for autovacuum to truncate the pg_xact # XXX Is it worth to add a timeout here? - pg_xact_0000_path = os.path.join(pg.pg_xact_dir_path(), '0000') + pg_xact_0000_path = os.path.join(pg.pg_xact_dir_path(), "0000") log.info(f"pg_xact_0000_path = {pg_xact_0000_path}") while os.path.isfile(pg_xact_0000_path): @@ -55,18 +54,18 @@ def test_clog_truncate(neon_simple_env: NeonEnv): # checkpoint to advance latest lsn with pg.cursor() as cur: - cur.execute('CHECKPOINT;') - lsn_after_truncation = query_scalar(cur, 'select pg_current_wal_insert_lsn()') + cur.execute("CHECKPOINT;") + lsn_after_truncation = query_scalar(cur, "select pg_current_wal_insert_lsn()") # create new branch after clog truncation and start a compute node on it - log.info(f'create branch at lsn_after_truncation {lsn_after_truncation}') - env.neon_cli.create_branch('test_clog_truncate_new', - 'test_clog_truncate', - ancestor_start_lsn=lsn_after_truncation) - pg2 = env.postgres.create_start('test_clog_truncate_new') - log.info('postgres is running on test_clog_truncate_new branch') + log.info(f"create branch at lsn_after_truncation {lsn_after_truncation}") + env.neon_cli.create_branch( + "test_clog_truncate_new", "test_clog_truncate", ancestor_start_lsn=lsn_after_truncation + ) + pg2 = env.postgres.create_start("test_clog_truncate_new") + log.info("postgres is running on test_clog_truncate_new branch") # check that new node doesn't contain truncated segment - pg_xact_0000_path_new = os.path.join(pg2.pg_xact_dir_path(), '0000') + pg_xact_0000_path_new = os.path.join(pg2.pg_xact_dir_path(), "0000") log.info(f"pg_xact_0000_path_new = {pg_xact_0000_path_new}") assert os.path.isfile(pg_xact_0000_path_new) is False diff --git a/test_runner/batch_others/test_close_fds.py b/test_runner/batch_others/test_close_fds.py index 9521b1bb4a..c7ea37f9c8 100644 --- a/test_runner/batch_others/test_close_fds.py +++ b/test_runner/batch_others/test_close_fds.py @@ -1,18 +1,18 @@ -from contextlib import closing -import shutil -import time -import subprocess import os.path +import shutil +import subprocess +import time +from contextlib import closing from cached_property import threading -from fixtures.neon_fixtures import NeonEnv from fixtures.log_helper import log +from fixtures.neon_fixtures import NeonEnv def lsof_path() -> str: path_output = shutil.which("lsof") if path_output is None: - raise RuntimeError('lsof not found in PATH') + raise RuntimeError("lsof not found in PATH") else: return path_output @@ -36,16 +36,18 @@ def test_lsof_pageserver_pid(neon_simple_env: NeonEnv): path = os.path.join(env.repo_dir, "pageserver.pid") lsof = lsof_path() while workload_thread.is_alive(): - res = subprocess.run([lsof, path], - check=False, - universal_newlines=True, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE) + res = subprocess.run( + [lsof, path], + check=False, + universal_newlines=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) # parse the `lsof` command's output to get only the list of commands - commands = [line.split(' ')[0] for line in res.stdout.strip().split('\n')[1:]] + commands = [line.split(" ")[0] for line in res.stdout.strip().split("\n")[1:]] if len(commands) > 0: log.info(f"lsof commands: {commands}") - assert commands == ['pageserve'] + assert commands == ["pageserve"] time.sleep(1.0) diff --git a/test_runner/batch_others/test_config.py b/test_runner/batch_others/test_config.py index 51deeebeed..3477d96b89 100644 --- a/test_runner/batch_others/test_config.py +++ b/test_runner/batch_others/test_config.py @@ -1,7 +1,7 @@ from contextlib import closing -from fixtures.neon_fixtures import NeonEnv from fixtures.log_helper import log +from fixtures.neon_fixtures import NeonEnv # @@ -12,19 +12,21 @@ def test_config(neon_simple_env: NeonEnv): env.neon_cli.create_branch("test_config", "empty") # change config - pg = env.postgres.create_start('test_config', config_lines=['log_min_messages=debug1']) - log.info('postgres is running on test_config branch') + pg = env.postgres.create_start("test_config", config_lines=["log_min_messages=debug1"]) + log.info("postgres is running on test_config branch") with closing(pg.connect()) as conn: with conn.cursor() as cur: - cur.execute(''' + cur.execute( + """ SELECT setting FROM pg_settings WHERE source != 'default' AND source != 'override' AND name = 'log_min_messages' - ''') + """ + ) # check that config change was applied - assert cur.fetchone() == ('debug1', ) + assert cur.fetchone() == ("debug1",) diff --git a/test_runner/batch_others/test_crafted_wal_end.py b/test_runner/batch_others/test_crafted_wal_end.py index d1c46fc73a..32e5366945 100644 --- a/test_runner/batch_others/test_crafted_wal_end.py +++ b/test_runner/batch_others/test_crafted_wal_end.py @@ -1,34 +1,38 @@ -from fixtures.neon_fixtures import NeonEnvBuilder, WalCraft -from fixtures.log_helper import log import pytest +from fixtures.log_helper import log +from fixtures.neon_fixtures import NeonEnvBuilder, WalCraft # Restart nodes with WAL end having specially crafted shape, like last record # crossing segment boundary, to test decoding issues. -@pytest.mark.parametrize('wal_type', - [ - 'simple', - 'last_wal_record_xlog_switch', - 'last_wal_record_xlog_switch_ends_on_page_boundary', - 'last_wal_record_crossing_segment', - 'wal_record_crossing_segment_followed_by_small_one', - ]) +@pytest.mark.parametrize( + "wal_type", + [ + "simple", + "last_wal_record_xlog_switch", + "last_wal_record_xlog_switch_ends_on_page_boundary", + "last_wal_record_crossing_segment", + "wal_record_crossing_segment_followed_by_small_one", + ], +) def test_crafted_wal_end(neon_env_builder: NeonEnvBuilder, wal_type: str): neon_env_builder.num_safekeepers = 1 env = neon_env_builder.init_start() - env.neon_cli.create_branch('test_crafted_wal_end') + env.neon_cli.create_branch("test_crafted_wal_end") - pg = env.postgres.create('test_crafted_wal_end') + pg = env.postgres.create("test_crafted_wal_end") wal_craft = WalCraft(env) pg.config(wal_craft.postgres_config()) pg.start() - res = pg.safe_psql_many(queries=[ - 'CREATE TABLE keys(key int primary key)', - 'INSERT INTO keys SELECT generate_series(1, 100)', - 'SELECT SUM(key) FROM keys' - ]) - assert res[-1][0] == (5050, ) + res = pg.safe_psql_many( + queries=[ + "CREATE TABLE keys(key int primary key)", + "INSERT INTO keys SELECT generate_series(1, 100)", + "SELECT SUM(key) FROM keys", + ] + ) + assert res[-1][0] == (5050,) wal_craft.in_existing(wal_type, pg.connstr()) @@ -39,13 +43,15 @@ def test_crafted_wal_end(neon_env_builder: NeonEnvBuilder, wal_type: str): env.pageserver.start() log.info("Trying more queries") - res = pg.safe_psql_many(queries=[ - 'SELECT SUM(key) FROM keys', - 'INSERT INTO keys SELECT generate_series(101, 200)', - 'SELECT SUM(key) FROM keys', - ]) - assert res[0][0] == (5050, ) - assert res[-1][0] == (20100, ) + res = pg.safe_psql_many( + queries=[ + "SELECT SUM(key) FROM keys", + "INSERT INTO keys SELECT generate_series(101, 200)", + "SELECT SUM(key) FROM keys", + ] + ) + assert res[0][0] == (5050,) + assert res[-1][0] == (20100,) log.info("Restarting all safekeepers and pageservers (again)") env.pageserver.stop() @@ -54,10 +60,12 @@ def test_crafted_wal_end(neon_env_builder: NeonEnvBuilder, wal_type: str): env.pageserver.start() log.info("Trying more queries (again)") - res = pg.safe_psql_many(queries=[ - 'SELECT SUM(key) FROM keys', - 'INSERT INTO keys SELECT generate_series(201, 300)', - 'SELECT SUM(key) FROM keys', - ]) - assert res[0][0] == (20100, ) - assert res[-1][0] == (45150, ) + res = pg.safe_psql_many( + queries=[ + "SELECT SUM(key) FROM keys", + "INSERT INTO keys SELECT generate_series(201, 300)", + "SELECT SUM(key) FROM keys", + ] + ) + assert res[0][0] == (20100,) + assert res[-1][0] == (45150,) diff --git a/test_runner/batch_others/test_createdropdb.py b/test_runner/batch_others/test_createdropdb.py index 0fbf6e2a47..fdb704ff15 100644 --- a/test_runner/batch_others/test_createdropdb.py +++ b/test_runner/batch_others/test_createdropdb.py @@ -1,9 +1,9 @@ import os import pathlib - from contextlib import closing -from fixtures.neon_fixtures import NeonEnv, check_restored_datadir_content + from fixtures.log_helper import log +from fixtures.neon_fixtures import NeonEnv, check_restored_datadir_content from fixtures.utils import query_scalar @@ -12,35 +12,37 @@ from fixtures.utils import query_scalar # def test_createdb(neon_simple_env: NeonEnv): env = neon_simple_env - env.neon_cli.create_branch('test_createdb', 'empty') + env.neon_cli.create_branch("test_createdb", "empty") - pg = env.postgres.create_start('test_createdb') + pg = env.postgres.create_start("test_createdb") log.info("postgres is running on 'test_createdb' branch") with pg.cursor() as cur: # Cause a 'relmapper' change in the original branch - cur.execute('VACUUM FULL pg_class') + cur.execute("VACUUM FULL pg_class") - cur.execute('CREATE DATABASE foodb') + cur.execute("CREATE DATABASE foodb") - lsn = query_scalar(cur, 'SELECT pg_current_wal_insert_lsn()') + lsn = query_scalar(cur, "SELECT pg_current_wal_insert_lsn()") # Create a branch - env.neon_cli.create_branch('test_createdb2', 'test_createdb', ancestor_start_lsn=lsn) - pg2 = env.postgres.create_start('test_createdb2') + env.neon_cli.create_branch("test_createdb2", "test_createdb", ancestor_start_lsn=lsn) + pg2 = env.postgres.create_start("test_createdb2") # Test that you can connect to the new database on both branches for db in (pg, pg2): - with db.cursor(dbname='foodb') as cur: + with db.cursor(dbname="foodb") as cur: # Check database size in both branches - cur.execute(""" + cur.execute( + """ select pg_size_pretty(pg_database_size('foodb')), pg_size_pretty( sum(pg_relation_size(oid, 'main')) +sum(pg_relation_size(oid, 'vm')) +sum(pg_relation_size(oid, 'fsm')) ) FROM pg_class where relisshared is false - """) + """ + ) res = cur.fetchone() assert res is not None # check that dbsize equals sum of all relation sizes, excluding shared ones @@ -53,48 +55,48 @@ def test_createdb(neon_simple_env: NeonEnv): # def test_dropdb(neon_simple_env: NeonEnv, test_output_dir): env = neon_simple_env - env.neon_cli.create_branch('test_dropdb', 'empty') - pg = env.postgres.create_start('test_dropdb') + env.neon_cli.create_branch("test_dropdb", "empty") + pg = env.postgres.create_start("test_dropdb") log.info("postgres is running on 'test_dropdb' branch") with pg.cursor() as cur: - cur.execute('CREATE DATABASE foodb') + cur.execute("CREATE DATABASE foodb") - lsn_before_drop = query_scalar(cur, 'SELECT pg_current_wal_insert_lsn()') + lsn_before_drop = query_scalar(cur, "SELECT pg_current_wal_insert_lsn()") dboid = query_scalar(cur, "SELECT oid FROM pg_database WHERE datname='foodb';") with pg.cursor() as cur: - cur.execute('DROP DATABASE foodb') + cur.execute("DROP DATABASE foodb") - cur.execute('CHECKPOINT') + cur.execute("CHECKPOINT") - lsn_after_drop = query_scalar(cur, 'SELECT pg_current_wal_insert_lsn()') + lsn_after_drop = query_scalar(cur, "SELECT pg_current_wal_insert_lsn()") # Create two branches before and after database drop. - env.neon_cli.create_branch('test_before_dropdb', - 'test_dropdb', - ancestor_start_lsn=lsn_before_drop) - pg_before = env.postgres.create_start('test_before_dropdb') + env.neon_cli.create_branch( + "test_before_dropdb", "test_dropdb", ancestor_start_lsn=lsn_before_drop + ) + pg_before = env.postgres.create_start("test_before_dropdb") - env.neon_cli.create_branch('test_after_dropdb', - 'test_dropdb', - ancestor_start_lsn=lsn_after_drop) - pg_after = env.postgres.create_start('test_after_dropdb') + env.neon_cli.create_branch( + "test_after_dropdb", "test_dropdb", ancestor_start_lsn=lsn_after_drop + ) + pg_after = env.postgres.create_start("test_after_dropdb") # Test that database exists on the branch before drop - pg_before.connect(dbname='foodb').close() + pg_before.connect(dbname="foodb").close() # Test that database subdir exists on the branch before drop assert pg_before.pgdata_dir - dbpath = pathlib.Path(pg_before.pgdata_dir) / 'base' / str(dboid) + dbpath = pathlib.Path(pg_before.pgdata_dir) / "base" / str(dboid) log.info(dbpath) assert os.path.isdir(dbpath) == True # Test that database subdir doesn't exist on the branch after drop assert pg_after.pgdata_dir - dbpath = pathlib.Path(pg_after.pgdata_dir) / 'base' / str(dboid) + dbpath = pathlib.Path(pg_after.pgdata_dir) / "base" / str(dboid) log.info(dbpath) assert os.path.isdir(dbpath) == False diff --git a/test_runner/batch_others/test_createuser.py b/test_runner/batch_others/test_createuser.py index d48db05395..c5f8246f5b 100644 --- a/test_runner/batch_others/test_createuser.py +++ b/test_runner/batch_others/test_createuser.py @@ -1,5 +1,5 @@ -from fixtures.neon_fixtures import NeonEnv from fixtures.log_helper import log +from fixtures.neon_fixtures import NeonEnv from fixtures.utils import query_scalar @@ -8,21 +8,21 @@ from fixtures.utils import query_scalar # def test_createuser(neon_simple_env: NeonEnv): env = neon_simple_env - env.neon_cli.create_branch('test_createuser', 'empty') - pg = env.postgres.create_start('test_createuser') + env.neon_cli.create_branch("test_createuser", "empty") + pg = env.postgres.create_start("test_createuser") log.info("postgres is running on 'test_createuser' branch") with pg.cursor() as cur: # Cause a 'relmapper' change in the original branch - cur.execute('CREATE USER testuser with password %s', ('testpwd', )) + cur.execute("CREATE USER testuser with password %s", ("testpwd",)) - cur.execute('CHECKPOINT') + cur.execute("CHECKPOINT") - lsn = query_scalar(cur, 'SELECT pg_current_wal_insert_lsn()') + lsn = query_scalar(cur, "SELECT pg_current_wal_insert_lsn()") # Create a branch - env.neon_cli.create_branch('test_createuser2', 'test_createuser', ancestor_start_lsn=lsn) - pg2 = env.postgres.create_start('test_createuser2') + env.neon_cli.create_branch("test_createuser2", "test_createuser", ancestor_start_lsn=lsn) + pg2 = env.postgres.create_start("test_createuser2") # Test that you can connect to new branch as a new user - assert pg2.safe_psql('select current_user', user='testuser') == [('testuser', )] + assert pg2.safe_psql("select current_user", user="testuser") == [("testuser",)] diff --git a/test_runner/batch_others/test_fsm_truncate.py b/test_runner/batch_others/test_fsm_truncate.py index 0f85942598..54ad2ffa34 100644 --- a/test_runner/batch_others/test_fsm_truncate.py +++ b/test_runner/batch_others/test_fsm_truncate.py @@ -1,11 +1,12 @@ +import pytest from fixtures.log_helper import log from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder, NeonPageserverHttpClient -import pytest def test_fsm_truncate(neon_env_builder: NeonEnvBuilder): env = neon_env_builder.init_start() env.neon_cli.create_branch("test_fsm_truncate") - pg = env.postgres.create_start('test_fsm_truncate') + pg = env.postgres.create_start("test_fsm_truncate") pg.safe_psql( - 'CREATE TABLE t1(key int); CREATE TABLE t2(key int); TRUNCATE TABLE t1; TRUNCATE TABLE t2;') + "CREATE TABLE t1(key int); CREATE TABLE t2(key int); TRUNCATE TABLE t1; TRUNCATE TABLE t2;" + ) diff --git a/test_runner/batch_others/test_fullbackup.py b/test_runner/batch_others/test_fullbackup.py index bce085c157..8155f52060 100644 --- a/test_runner/batch_others/test_fullbackup.py +++ b/test_runner/batch_others/test_fullbackup.py @@ -1,22 +1,28 @@ -from fixtures.log_helper import log -from fixtures.neon_fixtures import NeonEnvBuilder, PgBin, PortDistributor, VanillaPostgres -from fixtures.neon_fixtures import pg_distrib_dir import os + +from fixtures.log_helper import log +from fixtures.neon_fixtures import ( + NeonEnvBuilder, + PgBin, + PortDistributor, + VanillaPostgres, + pg_distrib_dir, +) from fixtures.utils import query_scalar, subprocess_capture num_rows = 1000 # Ensure that regular postgres can start from fullbackup -def test_fullbackup(neon_env_builder: NeonEnvBuilder, - pg_bin: PgBin, - port_distributor: PortDistributor): +def test_fullbackup( + neon_env_builder: NeonEnvBuilder, pg_bin: PgBin, port_distributor: PortDistributor +): neon_env_builder.num_safekeepers = 1 env = neon_env_builder.init_start() - env.neon_cli.create_branch('test_fullbackup') - pgmain = env.postgres.create_start('test_fullbackup') + env.neon_cli.create_branch("test_fullbackup") + pgmain = env.postgres.create_start("test_fullbackup") log.info("postgres is running on 'test_fullbackup' branch") with pgmain.cursor() as cur: @@ -24,16 +30,18 @@ def test_fullbackup(neon_env_builder: NeonEnvBuilder, # data loading may take a while, so increase statement timeout cur.execute("SET statement_timeout='300s'") - cur.execute(f'''CREATE TABLE tbl AS SELECT 'long string to consume some space' || g - from generate_series(1,{num_rows}) g''') + cur.execute( + f"""CREATE TABLE tbl AS SELECT 'long string to consume some space' || g + from generate_series(1,{num_rows}) g""" + ) cur.execute("CHECKPOINT") - lsn = query_scalar(cur, 'SELECT pg_current_wal_insert_lsn()') + lsn = query_scalar(cur, "SELECT pg_current_wal_insert_lsn()") log.info(f"start_backup_lsn = {lsn}") # Set LD_LIBRARY_PATH in the env properly, otherwise we may use the wrong libpq. # PgBin sets it automatically, but here we need to pipe psql output to the tar command. - psql_env = {'LD_LIBRARY_PATH': os.path.join(str(pg_distrib_dir), 'lib')} + psql_env = {"LD_LIBRARY_PATH": os.path.join(str(pg_distrib_dir), "lib")} # Get and unpack fullbackup from pageserver restored_dir_path = env.repo_dir / "restored_datadir" @@ -42,13 +50,14 @@ def test_fullbackup(neon_env_builder: NeonEnvBuilder, cmd = ["psql", "--no-psqlrc", env.pageserver.connstr(), "-c", query] result_basepath = pg_bin.run_capture(cmd, env=psql_env) tar_output_file = result_basepath + ".stdout" - subprocess_capture(str(env.repo_dir), - ["tar", "-xf", tar_output_file, "-C", str(restored_dir_path)]) + subprocess_capture( + str(env.repo_dir), ["tar", "-xf", tar_output_file, "-C", str(restored_dir_path)] + ) # HACK # fullbackup returns neon specific pg_control and first WAL segment # use resetwal to overwrite it - pg_resetwal_path = os.path.join(pg_bin.pg_bin_path, 'pg_resetwal') + pg_resetwal_path = os.path.join(pg_bin.pg_bin_path, "pg_resetwal") cmd = [pg_resetwal_path, "-D", str(restored_dir_path)] pg_bin.run_capture(cmd, env=psql_env) @@ -56,9 +65,11 @@ def test_fullbackup(neon_env_builder: NeonEnvBuilder, port = port_distributor.get_port() with VanillaPostgres(restored_dir_path, pg_bin, port, init=False) as vanilla_pg: # TODO make port an optional argument - vanilla_pg.configure([ - f"port={port}", - ]) + vanilla_pg.configure( + [ + f"port={port}", + ] + ) vanilla_pg.start() - num_rows_found = vanilla_pg.safe_psql('select count(*) from tbl;', user="cloud_admin")[0][0] + num_rows_found = vanilla_pg.safe_psql("select count(*) from tbl;", user="cloud_admin")[0][0] assert num_rows == num_rows_found diff --git a/test_runner/batch_others/test_gc_aggressive.py b/test_runner/batch_others/test_gc_aggressive.py index d7f6308182..be6b437e30 100644 --- a/test_runner/batch_others/test_gc_aggressive.py +++ b/test_runner/batch_others/test_gc_aggressive.py @@ -1,8 +1,8 @@ import asyncio import random -from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder, Postgres from fixtures.log_helper import log +from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder, Postgres from fixtures.utils import query_scalar # Test configuration @@ -24,7 +24,7 @@ async def update_table(pg: Postgres): while updates_performed < updates_to_perform: updates_performed += 1 id = random.randrange(1, num_rows) - row = await pg_conn.fetchrow(f'UPDATE foo SET counter = counter + 1 WHERE id = {id}') + row = await pg_conn.fetchrow(f"UPDATE foo SET counter = counter + 1 WHERE id = {id}") # Perform aggressive GC with 0 horizon @@ -57,24 +57,26 @@ def test_gc_aggressive(neon_env_builder: NeonEnvBuilder): neon_env_builder.pageserver_config_override = "tenant_config={pitr_interval = '0 sec'}" env = neon_env_builder.init_start() env.neon_cli.create_branch("test_gc_aggressive", "main") - pg = env.postgres.create_start('test_gc_aggressive') - log.info('postgres is running on test_gc_aggressive branch') + pg = env.postgres.create_start("test_gc_aggressive") + log.info("postgres is running on test_gc_aggressive branch") with pg.cursor() as cur: timeline = query_scalar(cur, "SHOW neon.timeline_id") # Create table, and insert the first 100 rows - cur.execute('CREATE TABLE foo (id int, counter int, t text)') - cur.execute(f''' + cur.execute("CREATE TABLE foo (id int, counter int, t text)") + cur.execute( + f""" INSERT INTO foo SELECT g, 0, 'long string to consume some space' || g FROM generate_series(1, {num_rows}) g - ''') - cur.execute('CREATE INDEX ON foo(id)') + """ + ) + cur.execute("CREATE INDEX ON foo(id)") asyncio.run(update_and_gc(env, pg, timeline)) - cur.execute('SELECT COUNT(*), SUM(counter) FROM foo') + cur.execute("SELECT COUNT(*), SUM(counter) FROM foo") r = cur.fetchone() assert r is not None assert r == (num_rows, updates_to_perform) diff --git a/test_runner/batch_others/test_import.py b/test_runner/batch_others/test_import.py index 039945e5e4..a2671727f7 100644 --- a/test_runner/batch_others/test_import.py +++ b/test_runner/batch_others/test_import.py @@ -1,17 +1,24 @@ -import re -import pytest -from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder, PgBin, Postgres, wait_for_upload, wait_for_last_record_lsn -from fixtures.utils import lsn_from_hex -from uuid import UUID, uuid4 -import os -import tarfile -import shutil -from pathlib import Path import json -from fixtures.utils import subprocess_capture -from fixtures.log_helper import log +import os +import re +import shutil +import tarfile from contextlib import closing -from fixtures.neon_fixtures import pg_distrib_dir +from pathlib import Path +from uuid import UUID, uuid4 + +import pytest +from fixtures.log_helper import log +from fixtures.neon_fixtures import ( + NeonEnv, + NeonEnvBuilder, + PgBin, + Postgres, + pg_distrib_dir, + wait_for_last_record_lsn, + wait_for_upload, +) +from fixtures.utils import lsn_from_hex, subprocess_capture @pytest.mark.timeout(600) @@ -19,9 +26,11 @@ def test_import_from_vanilla(test_output_dir, pg_bin, vanilla_pg, neon_env_build # Put data in vanilla pg vanilla_pg.start() vanilla_pg.safe_psql("create user cloud_admin with password 'postgres' superuser") - vanilla_pg.safe_psql('''create table t as select 'long string to consume some space' || g - from generate_series(1,300000) g''') - assert vanilla_pg.safe_psql('select count(*) from t') == [(300000, )] + vanilla_pg.safe_psql( + """create table t as select 'long string to consume some space' || g + from generate_series(1,300000) g""" + ) + assert vanilla_pg.safe_psql("select count(*) from t") == [(300000,)] # Take basebackup basebackup_dir = os.path.join(test_output_dir, "basebackup") @@ -29,15 +38,17 @@ def test_import_from_vanilla(test_output_dir, pg_bin, vanilla_pg, neon_env_build wal_tar = os.path.join(basebackup_dir, "pg_wal.tar") os.mkdir(basebackup_dir) vanilla_pg.safe_psql("CHECKPOINT") - pg_bin.run([ - "pg_basebackup", - "-F", - "tar", - "-d", - vanilla_pg.connstr(), - "-D", - basebackup_dir, - ]) + pg_bin.run( + [ + "pg_basebackup", + "-F", + "tar", + "-d", + vanilla_pg.connstr(), + "-D", + basebackup_dir, + ] + ) # Make corrupt base tar with missing pg_control unpacked_base = os.path.join(basebackup_dir, "unpacked-base") @@ -45,9 +56,11 @@ def test_import_from_vanilla(test_output_dir, pg_bin, vanilla_pg, neon_env_build os.mkdir(unpacked_base, 0o750) subprocess_capture(str(test_output_dir), ["tar", "-xf", base_tar, "-C", unpacked_base]) os.remove(os.path.join(unpacked_base, "global/pg_control")) - subprocess_capture(str(test_output_dir), - ["tar", "-cf", "corrupt-base.tar"] + os.listdir(unpacked_base), - cwd=unpacked_base) + subprocess_capture( + str(test_output_dir), + ["tar", "-cf", "corrupt-base.tar"] + os.listdir(unpacked_base), + cwd=unpacked_base, + ) # Get start_lsn and end_lsn with open(os.path.join(basebackup_dir, "backup_manifest")) as f: @@ -65,24 +78,26 @@ def test_import_from_vanilla(test_output_dir, pg_bin, vanilla_pg, neon_env_build env.pageserver.http_client().tenant_create(tenant) def import_tar(base, wal): - env.neon_cli.raw_cli([ - "timeline", - "import", - "--tenant-id", - tenant.hex, - "--timeline-id", - timeline.hex, - "--node-name", - node_name, - "--base-lsn", - start_lsn, - "--base-tarfile", - base, - "--end-lsn", - end_lsn, - "--wal-tarfile", - wal, - ]) + env.neon_cli.raw_cli( + [ + "timeline", + "import", + "--tenant-id", + tenant.hex, + "--timeline-id", + timeline.hex, + "--node-name", + node_name, + "--base-lsn", + start_lsn, + "--base-tarfile", + base, + "--end-lsn", + end_lsn, + "--wal-tarfile", + wal, + ] + ) # Importing corrupt backup fails with pytest.raises(Exception): @@ -102,7 +117,7 @@ def test_import_from_vanilla(test_output_dir, pg_bin, vanilla_pg, neon_env_build # Check it worked pg = env.postgres.create_start(node_name, tenant_id=tenant) - assert pg.safe_psql('select count(*) from t') == [(300000, )] + assert pg.safe_psql("select count(*) from t") == [(300000,)] @pytest.mark.timeout(600) @@ -111,8 +126,8 @@ def test_import_from_pageserver_small(pg_bin: PgBin, neon_env_builder: NeonEnvBu neon_env_builder.enable_local_fs_remote_storage() env = neon_env_builder.init_start() - timeline = env.neon_cli.create_branch('test_import_from_pageserver_small') - pg = env.postgres.create_start('test_import_from_pageserver_small') + timeline = env.neon_cli.create_branch("test_import_from_pageserver_small") + pg = env.postgres.create_start("test_import_from_pageserver_small") num_rows = 3000 lsn = _generate_data(num_rows, pg) @@ -129,8 +144,8 @@ def test_import_from_pageserver_multisegment(pg_bin: PgBin, neon_env_builder: Ne neon_env_builder.enable_local_fs_remote_storage() env = neon_env_builder.init_start() - timeline = env.neon_cli.create_branch('test_import_from_pageserver_multisegment') - pg = env.postgres.create_start('test_import_from_pageserver_multisegment') + timeline = env.neon_cli.create_branch("test_import_from_pageserver_multisegment") + pg = env.postgres.create_start("test_import_from_pageserver_multisegment") # For `test_import_from_pageserver_multisegment`, we want to make sure that the data # is large enough to create multi-segment files. Typically, a segment file's size is @@ -139,8 +154,9 @@ def test_import_from_pageserver_multisegment(pg_bin: PgBin, neon_env_builder: Ne num_rows = 30000000 lsn = _generate_data(num_rows, pg) - logical_size = env.pageserver.http_client().timeline_detail( - env.initial_tenant, timeline)['local']['current_logical_size'] + logical_size = env.pageserver.http_client().timeline_detail(env.initial_tenant, timeline)[ + "local" + ]["current_logical_size"] log.info(f"timeline logical size = {logical_size / (1024 ** 2)}MB") assert logical_size > 1024**3 # = 1GB @@ -148,7 +164,7 @@ def test_import_from_pageserver_multisegment(pg_bin: PgBin, neon_env_builder: Ne # Check if the backup data contains multiple segment files cnt_seg_files = 0 - segfile_re = re.compile('[0-9]+\\.[0-9]+') + segfile_re = re.compile("[0-9]+\\.[0-9]+") with tarfile.open(tar_output_file, "r") as tar_f: for f in tar_f.getnames(): if segfile_re.search(f) is not None: @@ -166,11 +182,13 @@ def _generate_data(num_rows: int, pg: Postgres) -> str: with conn.cursor() as cur: # data loading may take a while, so increase statement timeout cur.execute("SET statement_timeout='300s'") - cur.execute(f'''CREATE TABLE tbl AS SELECT 'long string to consume some space' || g - from generate_series(1,{num_rows}) g''') + cur.execute( + f"""CREATE TABLE tbl AS SELECT 'long string to consume some space' || g + from generate_series(1,{num_rows}) g""" + ) cur.execute("CHECKPOINT") - cur.execute('SELECT pg_current_wal_insert_lsn()') + cur.execute("SELECT pg_current_wal_insert_lsn()") res = cur.fetchone() assert res is not None and isinstance(res[0], str) return res[0] @@ -189,7 +207,7 @@ def _import(expected_num_rows: int, lsn: str, env: NeonEnv, pg_bin: PgBin, timel # Set LD_LIBRARY_PATH in the env properly, otherwise we may use the wrong libpq. # PgBin sets it automatically, but here we need to pipe psql output to the tar command. - psql_env = {'LD_LIBRARY_PATH': os.path.join(str(pg_distrib_dir), 'lib')} + psql_env = {"LD_LIBRARY_PATH": os.path.join(str(pg_distrib_dir), "lib")} # Get a fullbackup from pageserver query = f"fullbackup { env.initial_tenant.hex} {timeline.hex} {lsn}" @@ -201,11 +219,11 @@ def _import(expected_num_rows: int, lsn: str, env: NeonEnv, pg_bin: PgBin, timel env.postgres.stop_all() env.pageserver.stop() - dir_to_clear = Path(env.repo_dir) / 'tenants' + dir_to_clear = Path(env.repo_dir) / "tenants" shutil.rmtree(dir_to_clear) os.mkdir(dir_to_clear) - #start the pageserver again + # start the pageserver again env.pageserver.start() # Import using another tenantid, because we use the same pageserver. @@ -216,20 +234,22 @@ def _import(expected_num_rows: int, lsn: str, env: NeonEnv, pg_bin: PgBin, timel node_name = "import_from_pageserver" client = env.pageserver.http_client() client.tenant_create(tenant) - env.neon_cli.raw_cli([ - "timeline", - "import", - "--tenant-id", - tenant.hex, - "--timeline-id", - timeline.hex, - "--node-name", - node_name, - "--base-lsn", - lsn, - "--base-tarfile", - os.path.join(tar_output_file), - ]) + env.neon_cli.raw_cli( + [ + "timeline", + "import", + "--tenant-id", + tenant.hex, + "--timeline-id", + timeline.hex, + "--node-name", + node_name, + "--base-lsn", + lsn, + "--base-tarfile", + os.path.join(tar_output_file), + ] + ) # Wait for data to land in s3 wait_for_last_record_lsn(client, tenant, timeline, lsn_from_hex(lsn)) @@ -237,7 +257,7 @@ def _import(expected_num_rows: int, lsn: str, env: NeonEnv, pg_bin: PgBin, timel # Check it worked pg = env.postgres.create_start(node_name, tenant_id=tenant) - assert pg.safe_psql('select count(*) from tbl') == [(expected_num_rows, )] + assert pg.safe_psql("select count(*) from tbl") == [(expected_num_rows,)] # Take another fullbackup query = f"fullbackup { tenant.hex} {timeline.hex} {lsn}" diff --git a/test_runner/batch_others/test_large_schema.py b/test_runner/batch_others/test_large_schema.py index 18ae0614a9..f14265f6fd 100644 --- a/test_runner/batch_others/test_large_schema.py +++ b/test_runner/batch_others/test_large_schema.py @@ -1,7 +1,8 @@ -import time import os -from fixtures.neon_fixtures import NeonEnvBuilder +import time + from fixtures.log_helper import log +from fixtures.neon_fixtures import NeonEnvBuilder # This test creates large number of tables which cause large catalog. @@ -14,7 +15,7 @@ from fixtures.log_helper import log def test_large_schema(neon_env_builder: NeonEnvBuilder): env = neon_env_builder.init_start() - pg = env.postgres.create_start('main') + pg = env.postgres.create_start("main") conn = pg.connect() cur = conn.cursor() @@ -22,7 +23,7 @@ def test_large_schema(neon_env_builder: NeonEnvBuilder): tables = 2 # 10 is too much for debug build partitions = 1000 for i in range(1, tables + 1): - print(f'iteration {i} / {tables}') + print(f"iteration {i} / {tables}") # Restart compute. Restart is actually not strictly needed. # It is done mostly because this test originally tries to model the problem reported by Ketteq. @@ -52,10 +53,10 @@ def test_large_schema(neon_env_builder: NeonEnvBuilder): # It's normal that it takes some time for the pageserver to # restart, and for the connection to fail until it does. It # should eventually recover, so retry until it succeeds. - print(f'failed: {error}') + print(f"failed: {error}") if retries < max_retries: retries += 1 - print(f'retry {retries} / {max_retries}') + print(f"retry {retries} / {max_retries}") time.sleep(retry_sleep) continue else: @@ -67,7 +68,7 @@ def test_large_schema(neon_env_builder: NeonEnvBuilder): for i in range(1, tables + 1): cur.execute(f"SELECT count(*) FROM t_{i}") - assert cur.fetchone() == (partitions, ) + assert cur.fetchone() == (partitions,) cur.execute("set enable_sort=off") cur.execute("select * from pg_depend order by refclassid, refobjid, refobjsubid") @@ -77,6 +78,6 @@ def test_large_schema(neon_env_builder: NeonEnvBuilder): timeline_id = pg.safe_psql("show neon.timeline_id")[0][0] timeline_path = "{}/tenants/{}/timelines/{}/".format(env.repo_dir, tenant_id, timeline_id) for filename in os.listdir(timeline_path): - if filename.startswith('00000'): - log.info(f'layer {filename} size is {os.path.getsize(timeline_path + filename)}') + if filename.startswith("00000"): + log.info(f"layer {filename} size is {os.path.getsize(timeline_path + filename)}") assert os.path.getsize(timeline_path + filename) < 512_000_000 diff --git a/test_runner/batch_others/test_lsn_mapping.py b/test_runner/batch_others/test_lsn_mapping.py index d8b207135e..4db6951b42 100644 --- a/test_runner/batch_others/test_lsn_mapping.py +++ b/test_runner/batch_others/test_lsn_mapping.py @@ -1,13 +1,13 @@ +import math +import time from contextlib import closing from datetime import timedelta, timezone, tzinfo -import math from uuid import UUID -import psycopg2.extras -import psycopg2.errors -from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder, Postgres -from fixtures.log_helper import log -import time +import psycopg2.errors +import psycopg2.extras +from fixtures.log_helper import log +from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder, Postgres from fixtures.utils import query_scalar @@ -18,7 +18,7 @@ def test_lsn_mapping(neon_env_builder: NeonEnvBuilder): neon_env_builder.num_safekeepers = 1 env = neon_env_builder.init_start() - new_timeline_id = env.neon_cli.create_branch('test_lsn_mapping') + new_timeline_id = env.neon_cli.create_branch("test_lsn_mapping") pgmain = env.postgres.create_start("test_lsn_mapping") log.info("postgres is running on 'test_lsn_mapping' branch") @@ -35,7 +35,7 @@ def test_lsn_mapping(neon_env_builder: NeonEnvBuilder): for i in range(1000): cur.execute(f"INSERT INTO foo VALUES({i})") # Get the timestamp at UTC - after_timestamp = query_scalar(cur, 'SELECT clock_timestamp()').replace(tzinfo=None) + after_timestamp = query_scalar(cur, "SELECT clock_timestamp()").replace(tzinfo=None) tbl.append([i, after_timestamp]) # Execute one more transaction with synchronous_commit enabled, to flush @@ -47,17 +47,17 @@ def test_lsn_mapping(neon_env_builder: NeonEnvBuilder): probe_timestamp = tbl[-1][1] + timedelta(hours=1) result = query_scalar( ps_cur, - f"get_lsn_by_timestamp {env.initial_tenant.hex} {new_timeline_id.hex} '{probe_timestamp.isoformat()}Z'" + f"get_lsn_by_timestamp {env.initial_tenant.hex} {new_timeline_id.hex} '{probe_timestamp.isoformat()}Z'", ) - assert result == 'future' + assert result == "future" # timestamp too the far history probe_timestamp = tbl[0][1] - timedelta(hours=10) result = query_scalar( ps_cur, - f"get_lsn_by_timestamp {env.initial_tenant.hex} {new_timeline_id.hex} '{probe_timestamp.isoformat()}Z'" + f"get_lsn_by_timestamp {env.initial_tenant.hex} {new_timeline_id.hex} '{probe_timestamp.isoformat()}Z'", ) - assert result == 'past' + assert result == "past" # Probe a bunch of timestamps in the valid range for i in range(1, len(tbl), 100): @@ -66,14 +66,14 @@ def test_lsn_mapping(neon_env_builder: NeonEnvBuilder): # Call get_lsn_by_timestamp to get the LSN lsn = query_scalar( ps_cur, - f"get_lsn_by_timestamp {env.initial_tenant.hex} {new_timeline_id.hex} '{probe_timestamp.isoformat()}Z'" + f"get_lsn_by_timestamp {env.initial_tenant.hex} {new_timeline_id.hex} '{probe_timestamp.isoformat()}Z'", ) # Launch a new read-only node at that LSN, and check that only the rows # that were supposed to be committed at that point in time are visible. - pg_here = env.postgres.create_start(branch_name='test_lsn_mapping', - node_name='test_lsn_mapping_read', - lsn=lsn) + pg_here = env.postgres.create_start( + branch_name="test_lsn_mapping", node_name="test_lsn_mapping_read", lsn=lsn + ) assert pg_here.safe_psql("SELECT max(x) FROM foo")[0][0] == i pg_here.stop_and_destroy() diff --git a/test_runner/batch_others/test_multixact.py b/test_runner/batch_others/test_multixact.py index dd00066092..635beb16b7 100644 --- a/test_runner/batch_others/test_multixact.py +++ b/test_runner/batch_others/test_multixact.py @@ -1,5 +1,5 @@ -from fixtures.neon_fixtures import NeonEnv, check_restored_datadir_content from fixtures.log_helper import log +from fixtures.neon_fixtures import NeonEnv, check_restored_datadir_content from fixtures.utils import query_scalar @@ -11,18 +11,21 @@ from fixtures.utils import query_scalar # def test_multixact(neon_simple_env: NeonEnv, test_output_dir): env = neon_simple_env - env.neon_cli.create_branch('test_multixact', 'empty') - pg = env.postgres.create_start('test_multixact') + env.neon_cli.create_branch("test_multixact", "empty") + pg = env.postgres.create_start("test_multixact") log.info("postgres is running on 'test_multixact' branch") cur = pg.connect().cursor() - cur.execute(''' + cur.execute( + """ CREATE TABLE t1(i int primary key); INSERT INTO t1 select * from generate_series(1, 100); - ''') + """ + ) - next_multixact_id_old = query_scalar(cur, - 'SELECT next_multixact_id FROM pg_control_checkpoint()') + next_multixact_id_old = query_scalar( + cur, "SELECT next_multixact_id FROM pg_control_checkpoint()" + ) # Lock entries using parallel connections in a round-robin fashion. nclients = 20 @@ -40,17 +43,18 @@ def test_multixact(neon_simple_env: NeonEnv, test_output_dir): for i in range(5000): conn = connections[i % nclients] conn.commit() - conn.cursor().execute('select * from t1 for key share') + conn.cursor().execute("select * from t1 for key share") # We have multixacts now. We can close the connections. for c in connections: c.close() # force wal flush - cur.execute('checkpoint') + cur.execute("checkpoint") cur.execute( - 'SELECT next_multixact_id, pg_current_wal_insert_lsn() FROM pg_control_checkpoint()') + "SELECT next_multixact_id, pg_current_wal_insert_lsn() FROM pg_control_checkpoint()" + ) res = cur.fetchone() assert res is not None next_multixact_id = res[0] @@ -60,12 +64,13 @@ def test_multixact(neon_simple_env: NeonEnv, test_output_dir): assert int(next_multixact_id) > int(next_multixact_id_old) # Branch at this point - env.neon_cli.create_branch('test_multixact_new', 'test_multixact', ancestor_start_lsn=lsn) - pg_new = env.postgres.create_start('test_multixact_new') + env.neon_cli.create_branch("test_multixact_new", "test_multixact", ancestor_start_lsn=lsn) + pg_new = env.postgres.create_start("test_multixact_new") log.info("postgres is running on 'test_multixact_new' branch") next_multixact_id_new = pg_new.safe_psql( - 'SELECT next_multixact_id FROM pg_control_checkpoint()')[0][0] + "SELECT next_multixact_id FROM pg_control_checkpoint()" + )[0][0] # Check that we restored pg_controlfile correctly assert next_multixact_id_new == next_multixact_id diff --git a/test_runner/batch_others/test_neon_cli.py b/test_runner/batch_others/test_neon_cli.py index 728bc7b894..1acfa72127 100644 --- a/test_runner/batch_others/test_neon_cli.py +++ b/test_runner/batch_others/test_neon_cli.py @@ -1,21 +1,29 @@ import uuid -import requests - -from fixtures.neon_fixtures import DEFAULT_BRANCH_NAME, NeonEnv, NeonEnvBuilder, NeonPageserverHttpClient from typing import cast +import requests +from fixtures.neon_fixtures import ( + DEFAULT_BRANCH_NAME, + NeonEnv, + NeonEnvBuilder, + NeonPageserverHttpClient, +) -def helper_compare_timeline_list(pageserver_http_client: NeonPageserverHttpClient, - env: NeonEnv, - initial_tenant: uuid.UUID): + +def helper_compare_timeline_list( + pageserver_http_client: NeonPageserverHttpClient, env: NeonEnv, initial_tenant: uuid.UUID +): """ Compare timelines list returned by CLI and directly via API. Filters out timelines created by other tests. """ timelines_api = sorted( - map(lambda t: cast(str, t['timeline_id']), - pageserver_http_client.timeline_list(initial_tenant))) + map( + lambda t: cast(str, t["timeline_id"]), + pageserver_http_client.timeline_list(initial_tenant), + ) + ) timelines_cli = env.neon_cli.list_timelines() assert timelines_cli == env.neon_cli.list_timelines(initial_tenant) @@ -32,12 +40,13 @@ def test_cli_timeline_list(neon_simple_env: NeonEnv): helper_compare_timeline_list(pageserver_http_client, env, env.initial_tenant) # Create a branch for us - main_timeline_id = env.neon_cli.create_branch('test_cli_branch_list_main') + main_timeline_id = env.neon_cli.create_branch("test_cli_branch_list_main") helper_compare_timeline_list(pageserver_http_client, env, env.initial_tenant) # Create a nested branch - nested_timeline_id = env.neon_cli.create_branch('test_cli_branch_list_nested', - 'test_cli_branch_list_main') + nested_timeline_id = env.neon_cli.create_branch( + "test_cli_branch_list_nested", "test_cli_branch_list_main" + ) helper_compare_timeline_list(pageserver_http_client, env, env.initial_tenant) # Check that all new branches are visible via CLI @@ -49,7 +58,7 @@ def test_cli_timeline_list(neon_simple_env: NeonEnv): def helper_compare_tenant_list(pageserver_http_client: NeonPageserverHttpClient, env: NeonEnv): tenants = pageserver_http_client.tenant_list() - tenants_api = sorted(map(lambda t: cast(str, t['id']), tenants)) + tenants_api = sorted(map(lambda t: cast(str, t["id"]), tenants)) res = env.neon_cli.list_tenants() tenants_cli = sorted(map(lambda t: t.split()[0], res.stdout.splitlines())) @@ -97,7 +106,7 @@ def test_cli_ipv4_listeners(neon_env_builder: NeonEnvBuilder): env = neon_env_builder.init_start() # Connect to sk port on v4 loopback - res = requests.get(f'http://127.0.0.1:{env.safekeepers[0].port.http}/v1/status') + res = requests.get(f"http://127.0.0.1:{env.safekeepers[0].port.http}/v1/status") assert res.ok # FIXME Test setup is using localhost:xx in ps config. diff --git a/test_runner/batch_others/test_next_xid.py b/test_runner/batch_others/test_next_xid.py index f8d11a9381..698ea0e1d3 100644 --- a/test_runner/batch_others/test_next_xid.py +++ b/test_runner/batch_others/test_next_xid.py @@ -8,15 +8,15 @@ from fixtures.neon_fixtures import NeonEnvBuilder def test_next_xid(neon_env_builder: NeonEnvBuilder): env = neon_env_builder.init_start() - pg = env.postgres.create_start('main') + pg = env.postgres.create_start("main") conn = pg.connect() cur = conn.cursor() - cur.execute('CREATE TABLE t(x integer)') + cur.execute("CREATE TABLE t(x integer)") iterations = 32 for i in range(1, iterations + 1): - print(f'iteration {i} / {iterations}') + print(f"iteration {i} / {iterations}") # Kill and restart the pageserver. pg.stop() @@ -38,10 +38,10 @@ def test_next_xid(neon_env_builder: NeonEnvBuilder): # It's normal that it takes some time for the pageserver to # restart, and for the connection to fail until it does. It # should eventually recover, so retry until it succeeds. - print(f'failed: {error}') + print(f"failed: {error}") if retries < max_retries: retries += 1 - print(f'retry {retries} / {max_retries}') + print(f"retry {retries} / {max_retries}") time.sleep(retry_sleep) continue else: @@ -51,4 +51,4 @@ def test_next_xid(neon_env_builder: NeonEnvBuilder): conn = pg.connect() cur = conn.cursor() cur.execute("SELECT count(*) FROM t") - assert cur.fetchone() == (iterations, ) + assert cur.fetchone() == (iterations,) diff --git a/test_runner/batch_others/test_normal_work.py b/test_runner/batch_others/test_normal_work.py index 5b25691517..002d697288 100644 --- a/test_runner/batch_others/test_normal_work.py +++ b/test_runner/batch_others/test_normal_work.py @@ -1,33 +1,35 @@ +import pytest from fixtures.log_helper import log from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder, NeonPageserverHttpClient -import pytest def check_tenant(env: NeonEnv, pageserver_http: NeonPageserverHttpClient): tenant_id, timeline_id = env.neon_cli.create_tenant() - pg = env.postgres.create_start('main', tenant_id=tenant_id) + pg = env.postgres.create_start("main", tenant_id=tenant_id) # we rely upon autocommit after each statement - res_1 = pg.safe_psql_many(queries=[ - 'CREATE TABLE t(key int primary key, value text)', - 'INSERT INTO t SELECT generate_series(1,100000), \'payload\'', - 'SELECT sum(key) FROM t', - ]) + res_1 = pg.safe_psql_many( + queries=[ + "CREATE TABLE t(key int primary key, value text)", + "INSERT INTO t SELECT generate_series(1,100000), 'payload'", + "SELECT sum(key) FROM t", + ] + ) - assert res_1[-1][0] == (5000050000, ) + assert res_1[-1][0] == (5000050000,) # TODO check detach on live instance log.info("stopping compute") pg.stop() log.info("compute stopped") pg.start() - res_2 = pg.safe_psql('SELECT sum(key) FROM t') - assert res_2[0] == (5000050000, ) + res_2 = pg.safe_psql("SELECT sum(key) FROM t") + assert res_2[0] == (5000050000,) pg.stop() pageserver_http.tenant_detach(tenant_id) -@pytest.mark.parametrize('num_timelines,num_safekeepers', [(3, 1)]) +@pytest.mark.parametrize("num_timelines,num_safekeepers", [(3, 1)]) def test_normal_work(neon_env_builder: NeonEnvBuilder, num_timelines: int, num_safekeepers: int): """ Basic test: diff --git a/test_runner/batch_others/test_old_request_lsn.py b/test_runner/batch_others/test_old_request_lsn.py index 78a936af19..257913ef3f 100644 --- a/test_runner/batch_others/test_old_request_lsn.py +++ b/test_runner/batch_others/test_old_request_lsn.py @@ -1,7 +1,7 @@ -from fixtures.neon_fixtures import NeonEnvBuilder -from fixtures.log_helper import log -from fixtures.utils import print_gc_result, query_scalar import psycopg2.extras +from fixtures.log_helper import log +from fixtures.neon_fixtures import NeonEnvBuilder +from fixtures.utils import print_gc_result, query_scalar # @@ -19,8 +19,8 @@ def test_old_request_lsn(neon_env_builder: NeonEnvBuilder): neon_env_builder.pageserver_config_override = "tenant_config={pitr_interval = '0 sec'}" env = neon_env_builder.init_start() env.neon_cli.create_branch("test_old_request_lsn", "main") - pg = env.postgres.create_start('test_old_request_lsn') - log.info('postgres is running on test_old_request_lsn branch') + pg = env.postgres.create_start("test_old_request_lsn") + log.info("postgres is running on test_old_request_lsn branch") pg_conn = pg.connect() cur = pg_conn.cursor() @@ -33,25 +33,29 @@ def test_old_request_lsn(neon_env_builder: NeonEnvBuilder): # Create table, and insert some rows. Make it big enough that it doesn't fit in # shared_buffers. - cur.execute('CREATE TABLE foo (id int4 PRIMARY KEY, val int, t text)') - cur.execute(''' + cur.execute("CREATE TABLE foo (id int4 PRIMARY KEY, val int, t text)") + cur.execute( + """ INSERT INTO foo SELECT g, 1, 'long string to consume some space' || g FROM generate_series(1, 100000) g - ''') + """ + ) # Verify that the table is larger than shared_buffers, so that the SELECT below # will cause GetPage requests. - cur.execute(''' + cur.execute( + """ select setting::int * pg_size_bytes(unit) as shared_buffers, pg_relation_size('foo') as tbl_ize from pg_settings where name = 'shared_buffers' - ''') + """ + ) row = cur.fetchone() assert row is not None - log.info(f'shared_buffers is {row[0]}, table size {row[1]}') + log.info(f"shared_buffers is {row[0]}, table size {row[1]}") assert int(row[0]) < int(row[1]) - cur.execute('VACUUM foo') + cur.execute("VACUUM foo") # Make a lot of updates on a single row, generating a lot of WAL. Trigger # garbage collections so that the page server will remove old page versions. @@ -61,7 +65,7 @@ def test_old_request_lsn(neon_env_builder: NeonEnvBuilder): print_gc_result(row) for j in range(100): - cur.execute('UPDATE foo SET val = val + 1 WHERE id = 1;') + cur.execute("UPDATE foo SET val = val + 1 WHERE id = 1;") # All (or at least most of) the updates should've been on the same page, so # that we haven't had to evict any dirty pages for a long time. Now run diff --git a/test_runner/batch_others/test_pageserver_api.py b/test_runner/batch_others/test_pageserver_api.py index 710b220ae8..5d7619c1b2 100644 --- a/test_runner/batch_others/test_pageserver_api.py +++ b/test_runner/batch_others/test_pageserver_api.py @@ -1,54 +1,65 @@ -from typing import Optional -from uuid import uuid4, UUID -import pytest -import pathlib import os +import pathlib import subprocess -from fixtures.utils import lsn_from_hex +from typing import Optional +from uuid import UUID, uuid4 + +import pytest from fixtures.log_helper import log from fixtures.neon_fixtures import ( DEFAULT_BRANCH_NAME, NeonEnv, NeonEnvBuilder, - NeonPageserverHttpClient, NeonPageserverApiException, - wait_until, + NeonPageserverHttpClient, neon_binpath, pg_distrib_dir, + wait_until, ) +from fixtures.utils import lsn_from_hex # test that we cannot override node id after init def test_pageserver_init_node_id(neon_simple_env: NeonEnv): repo_dir = neon_simple_env.repo_dir - pageserver_config = repo_dir / 'pageserver.toml' - pageserver_bin = pathlib.Path(neon_binpath) / 'pageserver' - run_pageserver = lambda args: subprocess.run([str(pageserver_bin), '-D', str(repo_dir), *args], - check=False, - universal_newlines=True, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE) + pageserver_config = repo_dir / "pageserver.toml" + pageserver_bin = pathlib.Path(neon_binpath) / "pageserver" + run_pageserver = lambda args: subprocess.run( + [str(pageserver_bin), "-D", str(repo_dir), *args], + check=False, + universal_newlines=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) # remove initial config pageserver_config.unlink() - bad_init = run_pageserver(['--init', '-c', f'pg_distrib_dir="{pg_distrib_dir}"']) - assert bad_init.returncode == 1, 'pageserver should not be able to init new config without the node id' + bad_init = run_pageserver(["--init", "-c", f'pg_distrib_dir="{pg_distrib_dir}"']) + assert ( + bad_init.returncode == 1 + ), "pageserver should not be able to init new config without the node id" assert "missing id" in bad_init.stderr - assert not pageserver_config.exists(), 'config file should not be created after init error' + assert not pageserver_config.exists(), "config file should not be created after init error" completed_init = run_pageserver( - ['--init', '-c', 'id = 12345', '-c', f'pg_distrib_dir="{pg_distrib_dir}"']) - assert completed_init.returncode == 0, 'pageserver should be able to create a new config with the node id given' - assert pageserver_config.exists(), 'config file should be created successfully' + ["--init", "-c", "id = 12345", "-c", f'pg_distrib_dir="{pg_distrib_dir}"'] + ) + assert ( + completed_init.returncode == 0 + ), "pageserver should be able to create a new config with the node id given" + assert pageserver_config.exists(), "config file should be created successfully" bad_reinit = run_pageserver( - ['--init', '-c', 'id = 12345', '-c', f'pg_distrib_dir="{pg_distrib_dir}"']) - assert bad_reinit.returncode == 1, 'pageserver should not be able to init new config without the node id' + ["--init", "-c", "id = 12345", "-c", f'pg_distrib_dir="{pg_distrib_dir}"'] + ) + assert ( + bad_reinit.returncode == 1 + ), "pageserver should not be able to init new config without the node id" assert "already exists, cannot init it" in bad_reinit.stderr - bad_update = run_pageserver(['--update-config', '-c', 'id = 3']) - assert bad_update.returncode == 1, 'pageserver should not allow updating node id' + bad_update = run_pageserver(["--update-config", "-c", "id = 3"]) + assert bad_update.returncode == 1, "pageserver should not allow updating node id" assert "has node id already, it cannot be overridden" in bad_update.stderr @@ -56,12 +67,12 @@ def check_client(client: NeonPageserverHttpClient, initial_tenant: UUID): client.check_status() # check initial tenant is there - assert initial_tenant.hex in {t['id'] for t in client.tenant_list()} + assert initial_tenant.hex in {t["id"] for t in client.tenant_list()} # create new tenant and check it is also there tenant_id = uuid4() client.tenant_create(tenant_id) - assert tenant_id.hex in {t['id'] for t in client.tenant_list()} + assert tenant_id.hex in {t["id"] for t in client.tenant_list()} timelines = client.timeline_list(tenant_id) assert len(timelines) == 0, "initial tenant should not have any timelines" @@ -74,19 +85,21 @@ def check_client(client: NeonPageserverHttpClient, initial_tenant: UUID): assert len(timelines) > 0 # check it is there - assert timeline_id.hex in {b['timeline_id'] for b in client.timeline_list(tenant_id)} + assert timeline_id.hex in {b["timeline_id"] for b in client.timeline_list(tenant_id)} for timeline in timelines: - timeline_id_str = str(timeline['timeline_id']) - timeline_details = client.timeline_detail(tenant_id=tenant_id, - timeline_id=UUID(timeline_id_str), - include_non_incremental_logical_size=True) + timeline_id_str = str(timeline["timeline_id"]) + timeline_details = client.timeline_detail( + tenant_id=tenant_id, + timeline_id=UUID(timeline_id_str), + include_non_incremental_logical_size=True, + ) - assert timeline_details['tenant_id'] == tenant_id.hex - assert timeline_details['timeline_id'] == timeline_id_str + assert timeline_details["tenant_id"] == tenant_id.hex + assert timeline_details["timeline_id"] == timeline_id_str - local_timeline_details = timeline_details.get('local') + local_timeline_details = timeline_details.get("local") assert local_timeline_details is not None - assert local_timeline_details['timeline_state'] == 'Loaded' + assert local_timeline_details["timeline_state"] == "Loaded" def test_pageserver_http_get_wal_receiver_not_found(neon_simple_env: NeonEnv): @@ -94,32 +107,43 @@ def test_pageserver_http_get_wal_receiver_not_found(neon_simple_env: NeonEnv): with env.pageserver.http_client() as client: tenant_id, timeline_id = env.neon_cli.create_tenant() - timeline_details = client.timeline_detail(tenant_id=tenant_id, - timeline_id=timeline_id, - include_non_incremental_logical_size=True) + timeline_details = client.timeline_detail( + tenant_id=tenant_id, timeline_id=timeline_id, include_non_incremental_logical_size=True + ) - assert timeline_details.get('wal_source_connstr') is None, 'Should not be able to connect to WAL streaming without PG compute node running' - assert timeline_details.get('last_received_msg_lsn') is None, 'Should not be able to connect to WAL streaming without PG compute node running' - assert timeline_details.get('last_received_msg_ts') is None, 'Should not be able to connect to WAL streaming without PG compute node running' + assert ( + timeline_details.get("wal_source_connstr") is None + ), "Should not be able to connect to WAL streaming without PG compute node running" + assert ( + timeline_details.get("last_received_msg_lsn") is None + ), "Should not be able to connect to WAL streaming without PG compute node running" + assert ( + timeline_details.get("last_received_msg_ts") is None + ), "Should not be able to connect to WAL streaming without PG compute node running" -def expect_updated_msg_lsn(client: NeonPageserverHttpClient, - tenant_id: UUID, - timeline_id: UUID, - prev_msg_lsn: Optional[int]) -> int: +def expect_updated_msg_lsn( + client: NeonPageserverHttpClient, + tenant_id: UUID, + timeline_id: UUID, + prev_msg_lsn: Optional[int], +) -> int: timeline_details = client.timeline_detail(tenant_id, timeline_id=timeline_id) # a successful `timeline_details` response must contain the below fields - local_timeline_details = timeline_details['local'] + local_timeline_details = timeline_details["local"] assert "wal_source_connstr" in local_timeline_details.keys() assert "last_received_msg_lsn" in local_timeline_details.keys() assert "last_received_msg_ts" in local_timeline_details.keys() - assert local_timeline_details["last_received_msg_lsn"] is not None, "the last received message's LSN is empty" + assert ( + local_timeline_details["last_received_msg_lsn"] is not None + ), "the last received message's LSN is empty" last_msg_lsn = lsn_from_hex(local_timeline_details["last_received_msg_lsn"]) - assert prev_msg_lsn is None or prev_msg_lsn < last_msg_lsn, \ - f"the last received message's LSN {last_msg_lsn} hasn't been updated \ + assert ( + prev_msg_lsn is None or prev_msg_lsn < last_msg_lsn + ), f"the last received message's LSN {last_msg_lsn} hasn't been updated \ compared to the previous message's LSN {prev_msg_lsn}" return last_msg_lsn @@ -139,15 +163,19 @@ def test_pageserver_http_get_wal_receiver_success(neon_simple_env: NeonEnv): # We need to wait here because it's possible that we don't have access to # the latest WAL yet, when the `timeline_detail` API is first called. # See: https://github.com/neondatabase/neon/issues/1768. - lsn = wait_until(number_of_iterations=5, - interval=1, - func=lambda: expect_updated_msg_lsn(client, tenant_id, timeline_id, None)) + lsn = wait_until( + number_of_iterations=5, + interval=1, + func=lambda: expect_updated_msg_lsn(client, tenant_id, timeline_id, None), + ) # Make a DB modification then expect getting a new WAL receiver's data. pg.safe_psql("CREATE TABLE t(key int primary key, value text)") - wait_until(number_of_iterations=5, - interval=1, - func=lambda: expect_updated_msg_lsn(client, tenant_id, timeline_id, lsn)) + wait_until( + number_of_iterations=5, + interval=1, + func=lambda: expect_updated_msg_lsn(client, tenant_id, timeline_id, lsn), + ) def test_pageserver_http_api_client(neon_simple_env: NeonEnv): diff --git a/test_runner/batch_others/test_pageserver_catchup.py b/test_runner/batch_others/test_pageserver_catchup.py index dd24351e17..cba3203591 100644 --- a/test_runner/batch_others/test_pageserver_catchup.py +++ b/test_runner/batch_others/test_pageserver_catchup.py @@ -9,24 +9,27 @@ def test_pageserver_catchup_while_compute_down(neon_env_builder: NeonEnvBuilder) neon_env_builder.num_safekeepers = 3 env = neon_env_builder.init_start() - env.neon_cli.create_branch('test_pageserver_catchup_while_compute_down') + env.neon_cli.create_branch("test_pageserver_catchup_while_compute_down") # Make shared_buffers large to ensure we won't query pageserver while it is down. - pg = env.postgres.create_start('test_pageserver_catchup_while_compute_down', - config_lines=['shared_buffers=512MB']) + pg = env.postgres.create_start( + "test_pageserver_catchup_while_compute_down", config_lines=["shared_buffers=512MB"] + ) pg_conn = pg.connect() cur = pg_conn.cursor() # Create table, and insert some rows. - cur.execute('CREATE TABLE foo (t text)') - cur.execute(''' + cur.execute("CREATE TABLE foo (t text)") + cur.execute( + """ INSERT INTO foo SELECT 'long string to consume some space' || g FROM generate_series(1, 10000) g - ''') + """ + ) cur.execute("SELECT count(*) FROM foo") - assert cur.fetchone() == (10000, ) + assert cur.fetchone() == (10000,) # Stop and restart pageserver. This is a more or less graceful shutdown, although # the page server doesn't currently have a shutdown routine so there's no difference @@ -35,11 +38,13 @@ def test_pageserver_catchup_while_compute_down(neon_env_builder: NeonEnvBuilder) # insert some more rows # since pageserver is shut down, these will be only on safekeepers - cur.execute(''' + cur.execute( + """ INSERT INTO foo SELECT 'long string to consume some space' || g FROM generate_series(1, 10000) g - ''') + """ + ) # stop safekeepers gracefully env.safekeepers[0].stop() @@ -54,11 +59,11 @@ def test_pageserver_catchup_while_compute_down(neon_env_builder: NeonEnvBuilder) env.safekeepers[2].start() # restart compute node - pg.stop_and_destroy().create_start('test_pageserver_catchup_while_compute_down') + pg.stop_and_destroy().create_start("test_pageserver_catchup_while_compute_down") # Ensure that basebackup went correct and pageserver returned all data pg_conn = pg.connect() cur = pg_conn.cursor() cur.execute("SELECT count(*) FROM foo") - assert cur.fetchone() == (20000, ) + assert cur.fetchone() == (20000,) diff --git a/test_runner/batch_others/test_pageserver_restart.py b/test_runner/batch_others/test_pageserver_restart.py index c656469cb7..e2bd8be9b7 100644 --- a/test_runner/batch_others/test_pageserver_restart.py +++ b/test_runner/batch_others/test_pageserver_restart.py @@ -1,5 +1,5 @@ -from fixtures.neon_fixtures import NeonEnvBuilder from fixtures.log_helper import log +from fixtures.neon_fixtures import NeonEnvBuilder # Test restarting page server, while safekeeper and compute node keep @@ -7,8 +7,8 @@ from fixtures.log_helper import log def test_pageserver_restart(neon_env_builder: NeonEnvBuilder): env = neon_env_builder.init_start() - env.neon_cli.create_branch('test_pageserver_restart') - pg = env.postgres.create_start('test_pageserver_restart') + env.neon_cli.create_branch("test_pageserver_restart") + pg = env.postgres.create_start("test_pageserver_restart") pg_conn = pg.connect() cur = pg_conn.cursor() @@ -17,18 +17,22 @@ def test_pageserver_restart(neon_env_builder: NeonEnvBuilder): # shared_buffers, otherwise the SELECT after restart will just return answer # from shared_buffers without hitting the page server, which defeats the point # of this test. - cur.execute('CREATE TABLE foo (t text)') - cur.execute(''' + cur.execute("CREATE TABLE foo (t text)") + cur.execute( + """ INSERT INTO foo SELECT 'long string to consume some space' || g FROM generate_series(1, 100000) g - ''') + """ + ) # Verify that the table is larger than shared_buffers - cur.execute(''' + cur.execute( + """ select setting::int * pg_size_bytes(unit) as shared_buffers, pg_relation_size('foo') as tbl_ize from pg_settings where name = 'shared_buffers' - ''') + """ + ) row = cur.fetchone() assert row is not None log.info(f"shared_buffers is {row[0]}, table size {row[1]}") @@ -49,7 +53,7 @@ def test_pageserver_restart(neon_env_builder: NeonEnvBuilder): cur = pg_conn.cursor() cur.execute("SELECT count(*) FROM foo") - assert cur.fetchone() == (100000, ) + assert cur.fetchone() == (100000,) # Stop the page server by force, and restart it env.pageserver.stop() diff --git a/test_runner/batch_others/test_parallel_copy.py b/test_runner/batch_others/test_parallel_copy.py index 55947fe427..6b7fe4fdda 100644 --- a/test_runner/batch_others/test_parallel_copy.py +++ b/test_runner/batch_others/test_parallel_copy.py @@ -1,7 +1,8 @@ -from io import BytesIO import asyncio -from fixtures.neon_fixtures import NeonEnv, Postgres +from io import BytesIO + from fixtures.log_helper import log +from fixtures.neon_fixtures import NeonEnv, Postgres async def repeat_bytes(buf, repetitions: int): @@ -13,7 +14,8 @@ async def copy_test_data_to_table(pg: Postgres, worker_id: int, table_name: str) buf = BytesIO() for i in range(1000): buf.write( - f"{i}\tLoaded by worker {worker_id}. Long string to consume some space.\n".encode()) + f"{i}\tLoaded by worker {worker_id}. Long string to consume some space.\n".encode() + ) buf.seek(0) copy_input = repeat_bytes(buf.read(), 5000) @@ -30,7 +32,7 @@ async def copy_test_data_to_table(pg: Postgres, worker_id: int, table_name: str) async def parallel_load_same_table(pg: Postgres, n_parallel: int): workers = [] for worker_id in range(n_parallel): - worker = copy_test_data_to_table(pg, worker_id, f'copytest') + worker = copy_test_data_to_table(pg, worker_id, f"copytest") workers.append(asyncio.create_task(worker)) # await all workers @@ -41,13 +43,13 @@ async def parallel_load_same_table(pg: Postgres, n_parallel: int): def test_parallel_copy(neon_simple_env: NeonEnv, n_parallel=5): env = neon_simple_env env.neon_cli.create_branch("test_parallel_copy", "empty") - pg = env.postgres.create_start('test_parallel_copy') + pg = env.postgres.create_start("test_parallel_copy") log.info("postgres is running on 'test_parallel_copy' branch") # Create test table conn = pg.connect() cur = conn.cursor() - cur.execute(f'CREATE TABLE copytest (i int, t text)') + cur.execute(f"CREATE TABLE copytest (i int, t text)") # Run COPY TO to load the table with parallel connections. asyncio.run(parallel_load_same_table(pg, n_parallel)) diff --git a/test_runner/batch_others/test_pitr_gc.py b/test_runner/batch_others/test_pitr_gc.py index d63fc4b584..1fc18ebbc4 100644 --- a/test_runner/batch_others/test_pitr_gc.py +++ b/test_runner/batch_others/test_pitr_gc.py @@ -2,8 +2,8 @@ from contextlib import closing import psycopg2.extras from fixtures.log_helper import log -from fixtures.utils import print_gc_result, query_scalar from fixtures.neon_fixtures import NeonEnvBuilder +from fixtures.utils import print_gc_result, query_scalar # @@ -14,10 +14,12 @@ def test_pitr_gc(neon_env_builder: NeonEnvBuilder): neon_env_builder.num_safekeepers = 1 # Set pitr interval such that we need to keep the data - neon_env_builder.pageserver_config_override = "tenant_config={pitr_interval = '1 day', gc_horizon = 0}" + neon_env_builder.pageserver_config_override = ( + "tenant_config={pitr_interval = '1 day', gc_horizon = 0}" + ) env = neon_env_builder.init_start() - pgmain = env.postgres.create_start('main') + pgmain = env.postgres.create_start("main") log.info("postgres is running on 'main' branch") main_pg_conn = pgmain.connect() @@ -25,30 +27,32 @@ def test_pitr_gc(neon_env_builder: NeonEnvBuilder): timeline = query_scalar(main_cur, "SHOW neon.timeline_id") # Create table - main_cur.execute('CREATE TABLE foo (t text)') + main_cur.execute("CREATE TABLE foo (t text)") for i in range(10000): - main_cur.execute(''' + main_cur.execute( + """ INSERT INTO foo SELECT 'long string to consume some space'; - ''') + """ + ) if i == 99: # keep some early lsn to test branch creation after GC - main_cur.execute('SELECT pg_current_wal_insert_lsn(), txid_current()') + main_cur.execute("SELECT pg_current_wal_insert_lsn(), txid_current()") res = main_cur.fetchone() assert res is not None lsn_a = res[0] xid_a = res[1] - log.info(f'LSN after 100 rows: {lsn_a} xid {xid_a}') + log.info(f"LSN after 100 rows: {lsn_a} xid {xid_a}") - main_cur.execute('SELECT pg_current_wal_insert_lsn(), txid_current()') + main_cur.execute("SELECT pg_current_wal_insert_lsn(), txid_current()") res = main_cur.fetchone() assert res is not None debug_lsn = res[0] debug_xid = res[1] - log.info(f'LSN after 10000 rows: {debug_lsn} xid {debug_xid}') + log.info(f"LSN after 10000 rows: {debug_lsn} xid {debug_xid}") # run GC with closing(env.pageserver.connect()) as psconn: @@ -61,16 +65,16 @@ def test_pitr_gc(neon_env_builder: NeonEnvBuilder): # Branch at the point where only 100 rows were inserted # It must have been preserved by PITR setting - env.neon_cli.create_branch('test_pitr_gc_hundred', 'main', ancestor_start_lsn=lsn_a) + env.neon_cli.create_branch("test_pitr_gc_hundred", "main", ancestor_start_lsn=lsn_a) - pg_hundred = env.postgres.create_start('test_pitr_gc_hundred') + pg_hundred = env.postgres.create_start("test_pitr_gc_hundred") # On the 'hundred' branch, we should see only 100 rows hundred_pg_conn = pg_hundred.connect() hundred_cur = hundred_pg_conn.cursor() - hundred_cur.execute('SELECT count(*) FROM foo') - assert hundred_cur.fetchone() == (100, ) + hundred_cur.execute("SELECT count(*) FROM foo") + assert hundred_cur.fetchone() == (100,) # All the rows are visible on the main branch - main_cur.execute('SELECT count(*) FROM foo') - assert main_cur.fetchone() == (10000, ) + main_cur.execute("SELECT count(*) FROM foo") + assert main_cur.fetchone() == (10000,) diff --git a/test_runner/batch_others/test_proxy.py b/test_runner/batch_others/test_proxy.py index 2d9957fc38..dcff177044 100644 --- a/test_runner/batch_others/test_proxy.py +++ b/test_runner/batch_others/test_proxy.py @@ -1,25 +1,26 @@ -import pytest import psycopg2 +import pytest def test_proxy_select_1(static_proxy): - static_proxy.safe_psql('select 1', options='project=generic-project-name') + static_proxy.safe_psql("select 1", options="project=generic-project-name") def test_password_hack(static_proxy): - user = 'borat' - password = 'password' - static_proxy.safe_psql(f"create role {user} with login password '{password}'", - options='project=irrelevant') + user = "borat" + password = "password" + static_proxy.safe_psql( + f"create role {user} with login password '{password}'", options="project=irrelevant" + ) # Note the format of `magic`! magic = f"project=irrelevant;{password}" - static_proxy.safe_psql('select 1', sslsni=0, user=user, password=magic) + static_proxy.safe_psql("select 1", sslsni=0, user=user, password=magic) # Must also check that invalid magic won't be accepted. with pytest.raises(psycopg2.errors.OperationalError): magic = "broken" - static_proxy.safe_psql('select 1', sslsni=0, user=user, password=magic) + static_proxy.safe_psql("select 1", sslsni=0, user=user, password=magic) # Pass extra options to the server. @@ -28,8 +29,8 @@ def test_password_hack(static_proxy): # See https://github.com/neondatabase/neon/issues/1287 @pytest.mark.xfail def test_proxy_options(static_proxy): - with static_proxy.connect(options='-cproxytest.option=value') as conn: + with static_proxy.connect(options="-cproxytest.option=value") as conn: with conn.cursor() as cur: - cur.execute('SHOW proxytest.option') + cur.execute("SHOW proxytest.option") value = cur.fetchall()[0][0] - assert value == 'value' + assert value == "value" diff --git a/test_runner/batch_others/test_read_validation.py b/test_runner/batch_others/test_read_validation.py index 4be7af4c10..beaae0351b 100644 --- a/test_runner/batch_others/test_read_validation.py +++ b/test_runner/batch_others/test_read_validation.py @@ -1,14 +1,11 @@ from contextlib import closing -from fixtures.neon_fixtures import NeonEnv from fixtures.log_helper import log - -from psycopg2.errors import UndefinedTable -from psycopg2.errors import IoError - +from fixtures.neon_fixtures import NeonEnv from fixtures.utils import query_scalar +from psycopg2.errors import IoError, UndefinedTable -pytest_plugins = ("fixtures.neon_fixtures") +pytest_plugins = "fixtures.neon_fixtures" extensions = ["pageinspect", "neon_test_utils", "pg_buffercache"] @@ -47,13 +44,15 @@ def test_read_validation(neon_simple_env: NeonEnv): log.info("Test table is populated, validating buffer cache") cache_entries = query_scalar( - c, - "select count(*) from pg_buffercache where relfilenode = {}".format(relfilenode)) + c, "select count(*) from pg_buffercache where relfilenode = {}".format(relfilenode) + ) assert cache_entries > 0, "No buffers cached for the test relation" c.execute( - "select reltablespace, reldatabase, relfilenode from pg_buffercache where relfilenode = {}" - .format(relfilenode)) + "select reltablespace, reldatabase, relfilenode from pg_buffercache where relfilenode = {}".format( + relfilenode + ) + ) reln = c.fetchone() assert reln is not None @@ -62,21 +61,23 @@ def test_read_validation(neon_simple_env: NeonEnv): c.execute("select clear_buffer_cache()") cache_entries = query_scalar( - c, - "select count(*) from pg_buffercache where relfilenode = {}".format(relfilenode)) + c, "select count(*) from pg_buffercache where relfilenode = {}".format(relfilenode) + ) assert cache_entries == 0, "Failed to clear buffer cache" log.info("Cache is clear, reading stale page version") c.execute( - "select lsn, lower, upper from page_header(get_raw_page_at_lsn('foo', 'main', 0, '{}'))" - .format(first[0])) + "select lsn, lower, upper from page_header(get_raw_page_at_lsn('foo', 'main', 0, '{}'))".format( + first[0] + ) + ) direct_first = c.fetchone() assert first == direct_first, "Failed fetch page at historic lsn" cache_entries = query_scalar( - c, - "select count(*) from pg_buffercache where relfilenode = {}".format(relfilenode)) + c, "select count(*) from pg_buffercache where relfilenode = {}".format(relfilenode) + ) assert cache_entries == 0, "relation buffers detected after invalidation" log.info("Cache is clear, reading latest page version without cache") @@ -88,8 +89,8 @@ def test_read_validation(neon_simple_env: NeonEnv): assert second == direct_latest, "Failed fetch page at latest lsn" cache_entries = query_scalar( - c, - "select count(*) from pg_buffercache where relfilenode = {}".format(relfilenode)) + c, "select count(*) from pg_buffercache where relfilenode = {}".format(relfilenode) + ) assert cache_entries == 0, "relation buffers detected after invalidation" log.info( @@ -97,8 +98,10 @@ def test_read_validation(neon_simple_env: NeonEnv): ) c.execute( - "select lsn, lower, upper from page_header(get_raw_page_at_lsn( {}, {}, {}, 0, 0, '{}' ))" - .format(reln[0], reln[1], reln[2], first[0])) + "select lsn, lower, upper from page_header(get_raw_page_at_lsn( {}, {}, {}, 0, 0, '{}' ))".format( + reln[0], reln[1], reln[2], first[0] + ) + ) direct_first = c.fetchone() assert first == direct_first, "Failed fetch page at historic lsn using oid" @@ -107,20 +110,24 @@ def test_read_validation(neon_simple_env: NeonEnv): ) c.execute( - "select lsn, lower, upper from page_header(get_raw_page_at_lsn( {}, {}, {}, 0, 0, NULL ))" - .format(reln[0], reln[1], reln[2])) + "select lsn, lower, upper from page_header(get_raw_page_at_lsn( {}, {}, {}, 0, 0, NULL ))".format( + reln[0], reln[1], reln[2] + ) + ) direct_latest = c.fetchone() assert second == direct_latest, "Failed fetch page at latest lsn" - c.execute('drop table foo;') + c.execute("drop table foo;") log.info( "Relation dropped, attempting reading stale page version without cache using relation identifiers" ) c.execute( - "select lsn, lower, upper from page_header(get_raw_page_at_lsn( {}, {}, {}, 0, 0, '{}' ))" - .format(reln[0], reln[1], reln[2], first[0])) + "select lsn, lower, upper from page_header(get_raw_page_at_lsn( {}, {}, {}, 0, 0, '{}' ))".format( + reln[0], reln[1], reln[2], first[0] + ) + ) direct_first = c.fetchone() assert first == direct_first, "Failed fetch page at historic lsn using oid" diff --git a/test_runner/batch_others/test_readonly_node.py b/test_runner/batch_others/test_readonly_node.py index 82fc6329cf..0bd78c62a3 100644 --- a/test_runner/batch_others/test_readonly_node.py +++ b/test_runner/batch_others/test_readonly_node.py @@ -12,81 +12,87 @@ from fixtures.utils import query_scalar # def test_readonly_node(neon_simple_env: NeonEnv): env = neon_simple_env - env.neon_cli.create_branch('test_readonly_node', 'empty') - pgmain = env.postgres.create_start('test_readonly_node') + env.neon_cli.create_branch("test_readonly_node", "empty") + pgmain = env.postgres.create_start("test_readonly_node") log.info("postgres is running on 'test_readonly_node' branch") main_pg_conn = pgmain.connect() main_cur = main_pg_conn.cursor() # Create table, and insert the first 100 rows - main_cur.execute('CREATE TABLE foo (t text)') + main_cur.execute("CREATE TABLE foo (t text)") - main_cur.execute(''' + main_cur.execute( + """ INSERT INTO foo SELECT 'long string to consume some space' || g FROM generate_series(1, 100) g - ''') - main_cur.execute('SELECT pg_current_wal_insert_lsn()') - lsn_a = query_scalar(main_cur, 'SELECT pg_current_wal_insert_lsn()') - log.info('LSN after 100 rows: ' + lsn_a) + """ + ) + main_cur.execute("SELECT pg_current_wal_insert_lsn()") + lsn_a = query_scalar(main_cur, "SELECT pg_current_wal_insert_lsn()") + log.info("LSN after 100 rows: " + lsn_a) # Insert some more rows. (This generates enough WAL to fill a few segments.) - main_cur.execute(''' + main_cur.execute( + """ INSERT INTO foo SELECT 'long string to consume some space' || g FROM generate_series(1, 200000) g - ''') - lsn_b = query_scalar(main_cur, 'SELECT pg_current_wal_insert_lsn()') - log.info('LSN after 200100 rows: ' + lsn_b) + """ + ) + lsn_b = query_scalar(main_cur, "SELECT pg_current_wal_insert_lsn()") + log.info("LSN after 200100 rows: " + lsn_b) # Insert many more rows. This generates enough WAL to fill a few segments. - main_cur.execute(''' + main_cur.execute( + """ INSERT INTO foo SELECT 'long string to consume some space' || g FROM generate_series(1, 200000) g - ''') + """ + ) - lsn_c = query_scalar(main_cur, 'SELECT pg_current_wal_insert_lsn()') - log.info('LSN after 400100 rows: ' + lsn_c) + lsn_c = query_scalar(main_cur, "SELECT pg_current_wal_insert_lsn()") + log.info("LSN after 400100 rows: " + lsn_c) # Create first read-only node at the point where only 100 rows were inserted - pg_hundred = env.postgres.create_start(branch_name='test_readonly_node', - node_name='test_readonly_node_hundred', - lsn=lsn_a) + pg_hundred = env.postgres.create_start( + branch_name="test_readonly_node", node_name="test_readonly_node_hundred", lsn=lsn_a + ) # And another at the point where 200100 rows were inserted - pg_more = env.postgres.create_start(branch_name='test_readonly_node', - node_name='test_readonly_node_more', - lsn=lsn_b) + pg_more = env.postgres.create_start( + branch_name="test_readonly_node", node_name="test_readonly_node_more", lsn=lsn_b + ) # On the 'hundred' node, we should see only 100 rows hundred_pg_conn = pg_hundred.connect() hundred_cur = hundred_pg_conn.cursor() - hundred_cur.execute('SELECT count(*) FROM foo') - assert hundred_cur.fetchone() == (100, ) + hundred_cur.execute("SELECT count(*) FROM foo") + assert hundred_cur.fetchone() == (100,) # On the 'more' node, we should see 100200 rows more_pg_conn = pg_more.connect() more_cur = more_pg_conn.cursor() - more_cur.execute('SELECT count(*) FROM foo') - assert more_cur.fetchone() == (200100, ) + more_cur.execute("SELECT count(*) FROM foo") + assert more_cur.fetchone() == (200100,) # All the rows are visible on the main branch - main_cur.execute('SELECT count(*) FROM foo') - assert main_cur.fetchone() == (400100, ) + main_cur.execute("SELECT count(*) FROM foo") + assert main_cur.fetchone() == (400100,) # Check creating a node at segment boundary - pg = env.postgres.create_start(branch_name='test_readonly_node', - node_name='test_branch_segment_boundary', - lsn='0/3000000') + pg = env.postgres.create_start( + branch_name="test_readonly_node", node_name="test_branch_segment_boundary", lsn="0/3000000" + ) cur = pg.connect().cursor() - cur.execute('SELECT 1') - assert cur.fetchone() == (1, ) + cur.execute("SELECT 1") + assert cur.fetchone() == (1,) # Create node at pre-initdb lsn with pytest.raises(Exception, match="invalid basebackup lsn"): # compute node startup with invalid LSN should fail - env.postgres.create_start(branch_name='test_readonly_node', - node_name='test_readonly_node_preinitdb', - lsn='0/42') + env.postgres.create_start( + branch_name="test_readonly_node", node_name="test_readonly_node_preinitdb", lsn="0/42" + ) diff --git a/test_runner/batch_others/test_recovery.py b/test_runner/batch_others/test_recovery.py index 5ba783b802..5220aa6c2e 100644 --- a/test_runner/batch_others/test_recovery.py +++ b/test_runner/batch_others/test_recovery.py @@ -1,11 +1,12 @@ +import json import os import time -import psycopg2.extras -import json from ast import Assert from contextlib import closing -from fixtures.neon_fixtures import NeonEnvBuilder + +import psycopg2.extras from fixtures.log_helper import log +from fixtures.neon_fixtures import NeonEnvBuilder # @@ -21,13 +22,15 @@ def test_pageserver_recovery(neon_env_builder: NeonEnvBuilder): # Check if failpoints enables. Otherwise the test doesn't make sense f = env.neon_cli.pageserver_enabled_features() - assert "failpoints" in f["features"], "Build pageserver with --features=failpoints option to run this test" + assert ( + "failpoints" in f["features"] + ), "Build pageserver with --features=failpoints option to run this test" neon_env_builder.start() # Create a branch for us env.neon_cli.create_branch("test_pageserver_recovery", "main") - pg = env.postgres.create_start('test_pageserver_recovery') + pg = env.postgres.create_start("test_pageserver_recovery") log.info("postgres is running on 'test_pageserver_recovery' branch") connstr = pg.connstr() @@ -62,4 +65,4 @@ def test_pageserver_recovery(neon_env_builder: NeonEnvBuilder): with closing(pg.connect()) as conn: with conn.cursor() as cur: cur.execute("select count(*) from foo") - assert cur.fetchone() == (100000, ) + assert cur.fetchone() == (100000,) diff --git a/test_runner/batch_others/test_remote_storage.py b/test_runner/batch_others/test_remote_storage.py index ca46010dca..974d3402f6 100644 --- a/test_runner/batch_others/test_remote_storage.py +++ b/test_runner/batch_others/test_remote_storage.py @@ -1,14 +1,24 @@ # It's possible to run any regular test with the local fs remote storage via # env ZENITH_PAGESERVER_OVERRIDES="remote_storage={local_path='/tmp/neon_zzz/'}" poetry ...... -import shutil, os -from pathlib import Path +import os +import shutil import time +from pathlib import Path from uuid import UUID -from fixtures.neon_fixtures import NeonEnvBuilder, RemoteStorageKind, assert_timeline_local, available_remote_storages, wait_until, wait_for_last_record_lsn, wait_for_upload -from fixtures.log_helper import log -from fixtures.utils import lsn_from_hex, query_scalar + import pytest +from fixtures.log_helper import log +from fixtures.neon_fixtures import ( + NeonEnvBuilder, + RemoteStorageKind, + assert_timeline_local, + available_remote_storages, + wait_for_last_record_lsn, + wait_for_upload, + wait_until, +) +from fixtures.utils import lsn_from_hex, query_scalar # @@ -28,7 +38,7 @@ import pytest # * queries the specific data, ensuring that it matches the one stored before # # The tests are done for all types of remote storage pageserver supports. -@pytest.mark.parametrize('remote_storatge_kind', available_remote_storages()) +@pytest.mark.parametrize("remote_storatge_kind", available_remote_storages()) def test_remote_storage_backup_and_restore( neon_env_builder: NeonEnvBuilder, remote_storatge_kind: RemoteStorageKind, @@ -39,15 +49,15 @@ def test_remote_storage_backup_and_restore( neon_env_builder.enable_remote_storage( remote_storage_kind=remote_storatge_kind, - test_name='test_remote_storage_backup_and_restore', + test_name="test_remote_storage_backup_and_restore", ) data_id = 1 - data_secret = 'very secret secret' + data_secret = "very secret secret" ##### First start, insert secret data and upload it to the remote storage env = neon_env_builder.init_start() - pg = env.postgres.create_start('main') + pg = env.postgres.create_start("main") client = env.pageserver.http_client() @@ -58,10 +68,12 @@ def test_remote_storage_backup_and_restore( for checkpoint_number in checkpoint_numbers: with pg.cursor() as cur: - cur.execute(f''' + cur.execute( + f""" CREATE TABLE t{checkpoint_number}(id int primary key, secret text); INSERT INTO t{checkpoint_number} VALUES ({data_id}, '{data_secret}|{checkpoint_number}'); - ''') + """ + ) current_lsn = lsn_from_hex(query_scalar(cur, "SELECT pg_current_wal_flush_lsn()")) # wait until pageserver receives that data @@ -70,16 +82,16 @@ def test_remote_storage_backup_and_restore( # run checkpoint manually to be sure that data landed in remote storage env.pageserver.safe_psql(f"checkpoint {tenant_id} {timeline_id}") - log.info(f'waiting for checkpoint {checkpoint_number} upload') + log.info(f"waiting for checkpoint {checkpoint_number} upload") # wait until pageserver successfully uploaded a checkpoint to remote storage wait_for_upload(client, UUID(tenant_id), UUID(timeline_id), current_lsn) - log.info(f'upload of checkpoint {checkpoint_number} is done') + log.info(f"upload of checkpoint {checkpoint_number} is done") ##### Stop the first pageserver instance, erase all its data env.postgres.stop_all() env.pageserver.stop() - dir_to_clear = Path(env.repo_dir) / 'tenants' + dir_to_clear = Path(env.repo_dir) / "tenants" shutil.rmtree(dir_to_clear) os.mkdir(dir_to_clear) @@ -100,8 +112,8 @@ def test_remote_storage_backup_and_restore( detail = client.timeline_detail(UUID(tenant_id), UUID(timeline_id)) log.info("Timeline detail with active failpoint: %s", detail) - assert detail['local'] is None - assert detail['remote']['awaits_download'] + assert detail["local"] is None + assert detail["remote"]["awaits_download"] # trigger temporary download files removal env.pageserver.stop() @@ -110,19 +122,24 @@ def test_remote_storage_backup_and_restore( client.tenant_attach(UUID(tenant_id)) log.info("waiting for timeline redownload") - wait_until(number_of_iterations=20, - interval=1, - func=lambda: assert_timeline_local(client, UUID(tenant_id), UUID(timeline_id))) + wait_until( + number_of_iterations=20, + interval=1, + func=lambda: assert_timeline_local(client, UUID(tenant_id), UUID(timeline_id)), + ) detail = client.timeline_detail(UUID(tenant_id), UUID(timeline_id)) - assert detail['local'] is not None + assert detail["local"] is not None log.info("Timeline detail after attach completed: %s", detail) - assert lsn_from_hex(detail['local']['last_record_lsn']) >= current_lsn, 'current db Lsn should should not be less than the one stored on remote storage' - assert not detail['remote']['awaits_download'] + assert ( + lsn_from_hex(detail["local"]["last_record_lsn"]) >= current_lsn + ), "current db Lsn should should not be less than the one stored on remote storage" + assert not detail["remote"]["awaits_download"] - pg = env.postgres.create_start('main') + pg = env.postgres.create_start("main") with pg.cursor() as cur: for checkpoint_number in checkpoint_numbers: - assert query_scalar(cur, - f'SELECT secret FROM t{checkpoint_number} WHERE id = {data_id};' - ) == f'{data_secret}|{checkpoint_number}' + assert ( + query_scalar(cur, f"SELECT secret FROM t{checkpoint_number} WHERE id = {data_id};") + == f"{data_secret}|{checkpoint_number}" + ) diff --git a/test_runner/batch_others/test_subxacts.py b/test_runner/batch_others/test_subxacts.py index d06877825e..42234bf535 100644 --- a/test_runner/batch_others/test_subxacts.py +++ b/test_runner/batch_others/test_subxacts.py @@ -1,5 +1,5 @@ -from fixtures.neon_fixtures import NeonEnv, check_restored_datadir_content from fixtures.log_helper import log +from fixtures.neon_fixtures import NeonEnv, check_restored_datadir_content # Test subtransactions @@ -11,28 +11,30 @@ from fixtures.log_helper import log def test_subxacts(neon_simple_env: NeonEnv, test_output_dir): env = neon_simple_env env.neon_cli.create_branch("test_subxacts", "empty") - pg = env.postgres.create_start('test_subxacts') + pg = env.postgres.create_start("test_subxacts") log.info("postgres is running on 'test_subxacts' branch") pg_conn = pg.connect() cur = pg_conn.cursor() - cur.execute(''' + cur.execute( + """ CREATE TABLE t1(i int, j int); - ''') + """ + ) - cur.execute('select pg_switch_wal();') + cur.execute("select pg_switch_wal();") # Issue 100 transactions, with 1000 subtransactions in each. for i in range(100): - cur.execute('begin') + cur.execute("begin") for j in range(1000): - cur.execute(f'savepoint sp{j}') - cur.execute(f'insert into t1 values ({i}, {j})') - cur.execute('commit') + cur.execute(f"savepoint sp{j}") + cur.execute(f"insert into t1 values ({i}, {j})") + cur.execute("commit") # force wal flush - cur.execute('checkpoint') + cur.execute("checkpoint") # Check that we can restore the content of the datadir correctly check_restored_datadir_content(test_output_dir, env, pg) diff --git a/test_runner/batch_others/test_tenant_conf.py b/test_runner/batch_others/test_tenant_conf.py index d25aad742e..1e09ae8db7 100644 --- a/test_runner/batch_others/test_tenant_conf.py +++ b/test_runner/batch_others/test_tenant_conf.py @@ -1,27 +1,28 @@ from contextlib import closing -import pytest import psycopg2.extras - -from fixtures.neon_fixtures import NeonEnvBuilder +import pytest from fixtures.log_helper import log +from fixtures.neon_fixtures import NeonEnvBuilder def test_tenant_config(neon_env_builder: NeonEnvBuilder): # set some non-default global config - neon_env_builder.pageserver_config_override = ''' + neon_env_builder.pageserver_config_override = """ page_cache_size=444; wait_lsn_timeout='111 s'; -tenant_config={checkpoint_distance = 10000, compaction_target_size = 1048576}''' +tenant_config={checkpoint_distance = 10000, compaction_target_size = 1048576}""" env = neon_env_builder.init_start() """Test per tenant configuration""" - tenant, _ = env.neon_cli.create_tenant(conf={ - 'checkpoint_distance': '20000', - 'gc_period': '30sec', - }) + tenant, _ = env.neon_cli.create_tenant( + conf={ + "checkpoint_distance": "20000", + "gc_period": "30sec", + } + ) - env.neon_cli.create_timeline(f'test_tenant_conf', tenant_id=tenant) + env.neon_cli.create_timeline(f"test_tenant_conf", tenant_id=tenant) pg = env.postgres.create_start( "test_tenant_conf", "main", @@ -36,7 +37,8 @@ tenant_config={checkpoint_distance = 10000, compaction_target_size = 1048576}''' pscur.execute(f"show {env.initial_tenant.hex}") res = pscur.fetchone() assert all( - i in res.items() for i in { + i in res.items() + for i in { "checkpoint_distance": 10000, "compaction_target_size": 1048576, "compaction_period": 1, @@ -44,8 +46,9 @@ tenant_config={checkpoint_distance = 10000, compaction_target_size = 1048576}''' "gc_horizon": 67108864, "gc_period": 100, "image_creation_threshold": 3, - "pitr_interval": 2592000 - }.items()) + "pitr_interval": 2592000, + }.items() + ) # check the configuration of the new tenant with closing(env.pageserver.connect()) as psconn: @@ -54,7 +57,8 @@ tenant_config={checkpoint_distance = 10000, compaction_target_size = 1048576}''' res = pscur.fetchone() log.info(f"res: {res}") assert all( - i in res.items() for i in { + i in res.items() + for i in { "checkpoint_distance": 20000, "compaction_target_size": 1048576, "compaction_period": 1, @@ -62,15 +66,18 @@ tenant_config={checkpoint_distance = 10000, compaction_target_size = 1048576}''' "gc_horizon": 67108864, "gc_period": 30, "image_creation_threshold": 3, - "pitr_interval": 2592000 - }.items()) + "pitr_interval": 2592000, + }.items() + ) # update the config and ensure that it has changed - env.neon_cli.config_tenant(tenant_id=tenant, - conf={ - 'checkpoint_distance': '15000', - 'gc_period': '80sec', - }) + env.neon_cli.config_tenant( + tenant_id=tenant, + conf={ + "checkpoint_distance": "15000", + "gc_period": "80sec", + }, + ) with closing(env.pageserver.connect()) as psconn: with psconn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as pscur: @@ -78,7 +85,8 @@ tenant_config={checkpoint_distance = 10000, compaction_target_size = 1048576}''' res = pscur.fetchone() log.info(f"after config res: {res}") assert all( - i in res.items() for i in { + i in res.items() + for i in { "checkpoint_distance": 15000, "compaction_target_size": 1048576, "compaction_period": 1, @@ -86,8 +94,9 @@ tenant_config={checkpoint_distance = 10000, compaction_target_size = 1048576}''' "gc_horizon": 67108864, "gc_period": 80, "image_creation_threshold": 3, - "pitr_interval": 2592000 - }.items()) + "pitr_interval": 2592000, + }.items() + ) # restart the pageserver and ensure that the config is still correct env.pageserver.stop() @@ -99,7 +108,8 @@ tenant_config={checkpoint_distance = 10000, compaction_target_size = 1048576}''' res = pscur.fetchone() log.info(f"after restart res: {res}") assert all( - i in res.items() for i in { + i in res.items() + for i in { "checkpoint_distance": 15000, "compaction_target_size": 1048576, "compaction_period": 1, @@ -107,5 +117,6 @@ tenant_config={checkpoint_distance = 10000, compaction_target_size = 1048576}''' "gc_horizon": 67108864, "gc_period": 80, "image_creation_threshold": 3, - "pitr_interval": 2592000 - }.items()) + "pitr_interval": 2592000, + }.items() + ) diff --git a/test_runner/batch_others/test_tenant_detach.py b/test_runner/batch_others/test_tenant_detach.py index afc4f89bbf..f1b30429bf 100644 --- a/test_runner/batch_others/test_tenant_detach.py +++ b/test_runner/batch_others/test_tenant_detach.py @@ -1,9 +1,9 @@ +import uuid from threading import Thread from uuid import uuid4 -import uuid + import psycopg2 import pytest - from fixtures.log_helper import log from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder, NeonPageserverApiException @@ -11,7 +11,7 @@ from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder, NeonPageserverApiExc def do_gc_target(env: NeonEnv, tenant_id: uuid.UUID, timeline_id: uuid.UUID): """Hack to unblock main, see https://github.com/neondatabase/neon/issues/2211""" try: - env.pageserver.safe_psql(f'do_gc {tenant_id.hex} {timeline_id.hex} 0') + env.pageserver.safe_psql(f"do_gc {tenant_id.hex} {timeline_id.hex} 0") except Exception as e: log.error("do_gc failed: %s", e) @@ -22,8 +22,10 @@ def test_tenant_detach_smoke(neon_env_builder: NeonEnvBuilder): # first check for non existing tenant tenant_id = uuid4() - with pytest.raises(expected_exception=NeonPageserverApiException, - match=f'Tenant not found for id {tenant_id.hex}'): + with pytest.raises( + expected_exception=NeonPageserverApiException, + match=f"Tenant not found for id {tenant_id.hex}", + ): pageserver_http.tenant_detach(tenant_id) # create new nenant @@ -32,17 +34,20 @@ def test_tenant_detach_smoke(neon_env_builder: NeonEnvBuilder): # assert tenant exists on disk assert (env.repo_dir / "tenants" / tenant_id.hex).exists() - pg = env.postgres.create_start('main', tenant_id=tenant_id) + pg = env.postgres.create_start("main", tenant_id=tenant_id) # we rely upon autocommit after each statement - pg.safe_psql_many(queries=[ - 'CREATE TABLE t(key int primary key, value text)', - 'INSERT INTO t SELECT generate_series(1,100000), \'payload\'', - ]) + pg.safe_psql_many( + queries=[ + "CREATE TABLE t(key int primary key, value text)", + "INSERT INTO t SELECT generate_series(1,100000), 'payload'", + ] + ) # gc should not try to even start - with pytest.raises(expected_exception=psycopg2.DatabaseError, - match='gc target timeline does not exist'): - env.pageserver.safe_psql(f'do_gc {tenant_id.hex} {uuid4().hex} 0') + with pytest.raises( + expected_exception=psycopg2.DatabaseError, match="gc target timeline does not exist" + ): + env.pageserver.safe_psql(f"do_gc {tenant_id.hex} {uuid4().hex} 0") # try to concurrently run gc and detach gc_thread = Thread(target=lambda: do_gc_target(env, tenant_id, timeline_id)) @@ -67,6 +72,7 @@ def test_tenant_detach_smoke(neon_env_builder: NeonEnvBuilder): # check that nothing is left on disk for deleted tenant assert not (env.repo_dir / "tenants" / tenant_id.hex).exists() - with pytest.raises(expected_exception=psycopg2.DatabaseError, - match=f'Tenant {tenant_id.hex} not found'): - env.pageserver.safe_psql(f'do_gc {tenant_id.hex} {timeline_id.hex} 0') + with pytest.raises( + expected_exception=psycopg2.DatabaseError, match=f"Tenant {tenant_id.hex} not found" + ): + env.pageserver.safe_psql(f"do_gc {tenant_id.hex} {timeline_id.hex} 0") diff --git a/test_runner/batch_others/test_tenant_relocation.py b/test_runner/batch_others/test_tenant_relocation.py index eb65e2e3b5..a30804ee8e 100644 --- a/test_runner/batch_others/test_tenant_relocation.py +++ b/test_runner/batch_others/test_tenant_relocation.py @@ -34,12 +34,14 @@ def assert_abs_margin_ratio(a: float, b: float, margin_ratio: float): @contextmanager -def new_pageserver_helper(new_pageserver_dir: pathlib.Path, - pageserver_bin: pathlib.Path, - remote_storage_mock_path: pathlib.Path, - pg_port: int, - http_port: int, - broker: Optional[Etcd]): +def new_pageserver_helper( + new_pageserver_dir: pathlib.Path, + pageserver_bin: pathlib.Path, + remote_storage_mock_path: pathlib.Path, + pg_port: int, + http_port: int, + broker: Optional[Etcd], +): """ cannot use NeonPageserver yet because it depends on neon cli which currently lacks support for multiple pageservers @@ -47,10 +49,10 @@ def new_pageserver_helper(new_pageserver_dir: pathlib.Path, # actually run new pageserver cmd = [ str(pageserver_bin), - '--workdir', + "--workdir", str(new_pageserver_dir), - '--daemonize', - '--update-config', + "--daemonize", + "--update-config", f"-c listen_pg_addr='localhost:{pg_port}'", f"-c listen_http_addr='localhost:{http_port}'", f"-c pg_distrib_dir='{pg_distrib_dir}'", @@ -58,7 +60,9 @@ def new_pageserver_helper(new_pageserver_dir: pathlib.Path, f"-c remote_storage={{local_path='{remote_storage_mock_path}'}}", ] if broker is not None: - cmd.append(f"-c broker_endpoints=['{broker.client_url()}']", ) + cmd.append( + f"-c broker_endpoints=['{broker.client_url()}']", + ) log.info("starting new pageserver %s", cmd) out = subprocess.check_output(cmd, text=True) @@ -67,7 +71,7 @@ def new_pageserver_helper(new_pageserver_dir: pathlib.Path, yield finally: log.info("stopping new pageserver") - pid = int((new_pageserver_dir / 'pageserver.pid').read_text()) + pid = int((new_pageserver_dir / "pageserver.pid").read_text()) os.kill(pid, signal.SIGQUIT) @@ -105,7 +109,7 @@ def load(pg: Postgres, stop_event: threading.Event, load_ok_event: threading.Eve log.info("successfully recovered %s", inserted_ctr) failed = False load_ok_event.set() - log.info('load thread stopped') + log.info("load thread stopped") def populate_branch( @@ -123,8 +127,10 @@ def populate_branch( cur.execute("SELECT pg_current_wal_flush_lsn()") log.info("pg_current_wal_flush_lsn() %s", lsn_from_hex(cur.fetchone()[0])) - log.info("timeline detail %s", - ps_http.timeline_detail(tenant_id=tenant_id, timeline_id=timeline_id)) + log.info( + "timeline detail %s", + ps_http.timeline_detail(tenant_id=tenant_id, timeline_id=timeline_id), + ) # we rely upon autocommit after each statement # as waiting for acceptors happens there @@ -133,7 +139,7 @@ def populate_branch( cur.execute("INSERT INTO t SELECT generate_series(1,1000), 'some payload'") if expected_sum is not None: cur.execute("SELECT sum(key) FROM t") - assert cur.fetchone() == (expected_sum, ) + assert cur.fetchone() == (expected_sum,) cur.execute("SELECT pg_current_wal_flush_lsn()") current_lsn = lsn_from_hex(cur.fetchone()[0]) @@ -166,34 +172,41 @@ def check_timeline_attached( # when load is active these checks can break because lsns are not static # so lets check with some margin - assert_abs_margin_ratio(lsn_from_hex(new_timeline_detail['local']['disk_consistent_lsn']), - lsn_from_hex(old_timeline_detail['local']['disk_consistent_lsn']), - 0.03) + assert_abs_margin_ratio( + lsn_from_hex(new_timeline_detail["local"]["disk_consistent_lsn"]), + lsn_from_hex(old_timeline_detail["local"]["disk_consistent_lsn"]), + 0.03, + ) - assert_abs_margin_ratio(lsn_from_hex(new_timeline_detail['local']['disk_consistent_lsn']), - old_current_lsn, - 0.03) + assert_abs_margin_ratio( + lsn_from_hex(new_timeline_detail["local"]["disk_consistent_lsn"]), old_current_lsn, 0.03 + ) -def switch_pg_to_new_pageserver(env: NeonEnv, - pg: Postgres, - new_pageserver_port: int, - tenant_id: UUID, - timeline_id: UUID) -> pathlib.Path: +def switch_pg_to_new_pageserver( + env: NeonEnv, pg: Postgres, new_pageserver_port: int, tenant_id: UUID, timeline_id: UUID +) -> pathlib.Path: pg.stop() pg_config_file_path = pathlib.Path(pg.config_file_path()) - pg_config_file_path.open('a').write( - f"\nneon.pageserver_connstring = 'postgresql://no_user:@localhost:{new_pageserver_port}'") + pg_config_file_path.open("a").write( + f"\nneon.pageserver_connstring = 'postgresql://no_user:@localhost:{new_pageserver_port}'" + ) pg.start() - timeline_to_detach_local_path = env.repo_dir / 'tenants' / tenant_id.hex / 'timelines' / timeline_id.hex + timeline_to_detach_local_path = ( + env.repo_dir / "tenants" / tenant_id.hex / "timelines" / timeline_id.hex + ) files_before_detach = os.listdir(timeline_to_detach_local_path) - assert 'metadata' in files_before_detach, f'Regular timeline {timeline_to_detach_local_path} should have the metadata file,\ - but got: {files_before_detach}' - assert len(files_before_detach) >= 2, f'Regular timeline {timeline_to_detach_local_path} should have at least one layer file,\ - but got {files_before_detach}' + assert ( + "metadata" in files_before_detach + ), f"Regular timeline {timeline_to_detach_local_path} should have the metadata file,\ + but got: {files_before_detach}" + assert ( + len(files_before_detach) >= 2 + ), f"Regular timeline {timeline_to_detach_local_path} should have at least one layer file,\ + but got {files_before_detach}" return timeline_to_detach_local_path @@ -202,39 +215,44 @@ def post_migration_check(pg: Postgres, sum_before_migration: int, old_local_path with pg_cur(pg) as cur: # check that data is still there cur.execute("SELECT sum(key) FROM t") - assert cur.fetchone() == (sum_before_migration, ) + assert cur.fetchone() == (sum_before_migration,) # check that we can write new data cur.execute("INSERT INTO t SELECT generate_series(1001,2000), 'some payload'") cur.execute("SELECT sum(key) FROM t") - assert cur.fetchone() == (sum_before_migration + 1500500, ) + assert cur.fetchone() == (sum_before_migration + 1500500,) - assert not os.path.exists(old_local_path), f'After detach, local timeline dir {old_local_path} should be removed' + assert not os.path.exists( + old_local_path + ), f"After detach, local timeline dir {old_local_path} should be removed" @pytest.mark.parametrize( - 'method', + "method", [ # A minor migration involves no storage breaking changes. # It is done by attaching the tenant to a new pageserver. - 'minor', + "minor", # A major migration involves exporting a postgres datadir # basebackup and importing it into the new pageserver. # This kind of migration can tolerate breaking changes # to storage format - 'major', - ]) -@pytest.mark.parametrize('with_load', ['with_load', 'without_load']) -def test_tenant_relocation(neon_env_builder: NeonEnvBuilder, - port_distributor: PortDistributor, - test_output_dir, - method: str, - with_load: str): + "major", + ], +) +@pytest.mark.parametrize("with_load", ["with_load", "without_load"]) +def test_tenant_relocation( + neon_env_builder: NeonEnvBuilder, + port_distributor: PortDistributor, + test_output_dir, + method: str, + with_load: str, +): neon_env_builder.enable_local_fs_remote_storage() env = neon_env_builder.init_start() # create folder for remote storage mock - remote_storage_mock_path = env.repo_dir / 'local_fs_remote_storage' + remote_storage_mock_path = env.repo_dir / "local_fs_remote_storage" # we use two branches to check that they are both relocated # first branch is used for load, compute for second one is used to @@ -242,12 +260,15 @@ def test_tenant_relocation(neon_env_builder: NeonEnvBuilder, pageserver_http = env.pageserver.http_client() - tenant_id, initial_timeline_id = env.neon_cli.create_tenant(UUID("74ee8b079a0e437eb0afea7d26a07209")) + tenant_id, initial_timeline_id = env.neon_cli.create_tenant( + UUID("74ee8b079a0e437eb0afea7d26a07209") + ) log.info("tenant to relocate %s initial_timeline_id %s", tenant_id, initial_timeline_id) env.neon_cli.create_branch("test_tenant_relocation_main", tenant_id=tenant_id) - pg_main = env.postgres.create_start(branch_name='test_tenant_relocation_main', - tenant_id=tenant_id) + pg_main = env.postgres.create_start( + branch_name="test_tenant_relocation_main", tenant_id=tenant_id + ) timeline_id_main, current_lsn_main = populate_branch( pg_main, @@ -263,8 +284,9 @@ def test_tenant_relocation(neon_env_builder: NeonEnvBuilder, ancestor_start_lsn=lsn_to_hex(current_lsn_main), tenant_id=tenant_id, ) - pg_second = env.postgres.create_start(branch_name='test_tenant_relocation_second', - tenant_id=tenant_id) + pg_second = env.postgres.create_start( + branch_name="test_tenant_relocation_second", tenant_id=tenant_id + ) timeline_id_second, current_lsn_second = populate_branch( pg_second, @@ -281,7 +303,7 @@ def test_tenant_relocation(neon_env_builder: NeonEnvBuilder, wait_for_last_record_lsn(pageserver_http, tenant_id, timeline_id_second, current_lsn_second) timeline_detail_second = assert_timeline_local(pageserver_http, tenant_id, timeline_id_second) - if with_load == 'with_load': + if with_load == "with_load": # create load table with pg_cur(pg_main) as cur: cur.execute("CREATE TABLE load(value text)") @@ -317,22 +339,24 @@ def test_tenant_relocation(neon_env_builder: NeonEnvBuilder, log.info("inititalizing new pageserver") # bootstrap second pageserver - new_pageserver_dir = env.repo_dir / 'new_pageserver' + new_pageserver_dir = env.repo_dir / "new_pageserver" new_pageserver_dir.mkdir() new_pageserver_pg_port = port_distributor.get_port() new_pageserver_http_port = port_distributor.get_port() log.info("new pageserver ports pg %s http %s", new_pageserver_pg_port, new_pageserver_http_port) - pageserver_bin = pathlib.Path(neon_binpath) / 'pageserver' + pageserver_bin = pathlib.Path(neon_binpath) / "pageserver" new_pageserver_http = NeonPageserverHttpClient(port=new_pageserver_http_port, auth_token=None) - with new_pageserver_helper(new_pageserver_dir, - pageserver_bin, - remote_storage_mock_path, - new_pageserver_pg_port, - new_pageserver_http_port, - neon_env_builder.broker): + with new_pageserver_helper( + new_pageserver_dir, + pageserver_bin, + remote_storage_mock_path, + new_pageserver_pg_port, + new_pageserver_http_port, + neon_env_builder.broker, + ): # Migrate either by attaching from s3 or import/export basebackup if method == "major": @@ -367,13 +391,16 @@ def test_tenant_relocation(neon_env_builder: NeonEnvBuilder, # check that it shows that download is in progress tenant_status = new_pageserver_http.tenant_status(tenant_id=tenant_id) - assert tenant_status.get('has_in_progress_downloads'), tenant_status + assert tenant_status.get("has_in_progress_downloads"), tenant_status # wait until tenant is downloaded - wait_until(number_of_iterations=10, - interval=1, - func=lambda: assert_no_in_progress_downloads_for_tenant( - new_pageserver_http, tenant_id)) + wait_until( + number_of_iterations=10, + interval=1, + func=lambda: assert_no_in_progress_downloads_for_tenant( + new_pageserver_http, tenant_id + ), + ) check_timeline_attached( new_pageserver_http, @@ -392,10 +419,10 @@ def test_tenant_relocation(neon_env_builder: NeonEnvBuilder, ) # rewrite neon cli config to use new pageserver for basebackup to start new compute - cli_config_lines = (env.repo_dir / 'config').read_text().splitlines() + cli_config_lines = (env.repo_dir / "config").read_text().splitlines() cli_config_lines[-2] = f"listen_http_addr = 'localhost:{new_pageserver_http_port}'" cli_config_lines[-1] = f"listen_pg_addr = 'localhost:{new_pageserver_pg_port}'" - (env.repo_dir / 'config').write_text('\n'.join(cli_config_lines)) + (env.repo_dir / "config").write_text("\n".join(cli_config_lines)) old_local_path_main = switch_pg_to_new_pageserver( env, @@ -423,7 +450,8 @@ def test_tenant_relocation(neon_env_builder: NeonEnvBuilder, # ensure that we can successfully read all relations on the new pageserver with pg_cur(pg_second) as cur: - cur.execute(''' + cur.execute( + """ DO $$ DECLARE r RECORD; @@ -435,18 +463,19 @@ def test_tenant_relocation(neon_env_builder: NeonEnvBuilder, EXECUTE 'SELECT count(*) FROM quote_ident($1)' USING r.relname; END LOOP; END$$; - ''') + """ + ) - if with_load == 'with_load': + if with_load == "with_load": assert load_ok_event.wait(3) - log.info('stopping load thread') + log.info("stopping load thread") load_stop_event.set() load_thread.join(timeout=10) - log.info('load thread stopped') + log.info("load thread stopped") # bring old pageserver back for clean shutdown via neon cli # new pageserver will be shut down by the context manager - cli_config_lines = (env.repo_dir / 'config').read_text().splitlines() + cli_config_lines = (env.repo_dir / "config").read_text().splitlines() cli_config_lines[-2] = f"listen_http_addr = 'localhost:{env.pageserver.service_port.http}'" cli_config_lines[-1] = f"listen_pg_addr = 'localhost:{env.pageserver.service_port.pg}'" - (env.repo_dir / 'config').write_text('\n'.join(cli_config_lines)) + (env.repo_dir / "config").write_text("\n".join(cli_config_lines)) diff --git a/test_runner/batch_others/test_tenant_tasks.py b/test_runner/batch_others/test_tenant_tasks.py index fae2a2199d..8075756ffb 100644 --- a/test_runner/batch_others/test_tenant_tasks.py +++ b/test_runner/batch_others/test_tenant_tasks.py @@ -1,6 +1,7 @@ -from fixtures.neon_fixtures import NeonEnvBuilder, wait_until -from uuid import UUID import time +from uuid import UUID + +from fixtures.neon_fixtures import NeonEnvBuilder, wait_until def get_only_element(l): @@ -47,7 +48,7 @@ def test_tenant_tasks(neon_env_builder: NeonEnvBuilder): tenant, _ = env.neon_cli.create_tenant() timeline = env.neon_cli.create_timeline(name, tenant_id=tenant) pg = env.postgres.create_start(name, tenant_id=tenant) - assert (get_state(tenant) == "Active") + assert get_state(tenant) == "Active" # Stop compute pg.stop() diff --git a/test_runner/batch_others/test_tenants.py b/test_runner/batch_others/test_tenants.py index 8d73d8185c..0e0cd44471 100644 --- a/test_runner/batch_others/test_tenants.py +++ b/test_runner/batch_others/test_tenants.py @@ -1,15 +1,15 @@ +import os from contextlib import closing from datetime import datetime -import os -import pytest -from fixtures.neon_fixtures import NeonEnvBuilder +import pytest from fixtures.log_helper import log from fixtures.metrics import parse_metrics +from fixtures.neon_fixtures import NeonEnvBuilder from fixtures.utils import lsn_to_hex -@pytest.mark.parametrize('with_safekeepers', [False, True]) +@pytest.mark.parametrize("with_safekeepers", [False, True]) def test_tenants_normal_work(neon_env_builder: NeonEnvBuilder, with_safekeepers: bool): if with_safekeepers: neon_env_builder.num_safekeepers = 3 @@ -19,17 +19,19 @@ def test_tenants_normal_work(neon_env_builder: NeonEnvBuilder, with_safekeepers: tenant_1, _ = env.neon_cli.create_tenant() tenant_2, _ = env.neon_cli.create_tenant() - env.neon_cli.create_timeline(f'test_tenants_normal_work_with_safekeepers{with_safekeepers}', - tenant_id=tenant_1) - env.neon_cli.create_timeline(f'test_tenants_normal_work_with_safekeepers{with_safekeepers}', - tenant_id=tenant_2) + env.neon_cli.create_timeline( + f"test_tenants_normal_work_with_safekeepers{with_safekeepers}", tenant_id=tenant_1 + ) + env.neon_cli.create_timeline( + f"test_tenants_normal_work_with_safekeepers{with_safekeepers}", tenant_id=tenant_2 + ) pg_tenant1 = env.postgres.create_start( - f'test_tenants_normal_work_with_safekeepers{with_safekeepers}', + f"test_tenants_normal_work_with_safekeepers{with_safekeepers}", tenant_id=tenant_1, ) pg_tenant2 = env.postgres.create_start( - f'test_tenants_normal_work_with_safekeepers{with_safekeepers}', + f"test_tenants_normal_work_with_safekeepers{with_safekeepers}", tenant_id=tenant_2, ) @@ -41,7 +43,7 @@ def test_tenants_normal_work(neon_env_builder: NeonEnvBuilder, with_safekeepers: cur.execute("CREATE TABLE t(key int primary key, value text)") cur.execute("INSERT INTO t SELECT generate_series(1,100000), 'payload'") cur.execute("SELECT sum(key) FROM t") - assert cur.fetchone() == (5000050000, ) + assert cur.fetchone() == (5000050000,) def test_metrics_normal_work(neon_env_builder: NeonEnvBuilder): @@ -51,11 +53,11 @@ def test_metrics_normal_work(neon_env_builder: NeonEnvBuilder): tenant_1, _ = env.neon_cli.create_tenant() tenant_2, _ = env.neon_cli.create_tenant() - timeline_1 = env.neon_cli.create_timeline('test_metrics_normal_work', tenant_id=tenant_1) - timeline_2 = env.neon_cli.create_timeline('test_metrics_normal_work', tenant_id=tenant_2) + timeline_1 = env.neon_cli.create_timeline("test_metrics_normal_work", tenant_id=tenant_1) + timeline_2 = env.neon_cli.create_timeline("test_metrics_normal_work", tenant_id=tenant_2) - pg_tenant1 = env.postgres.create_start('test_metrics_normal_work', tenant_id=tenant_1) - pg_tenant2 = env.postgres.create_start('test_metrics_normal_work', tenant_id=tenant_2) + pg_tenant1 = env.postgres.create_start("test_metrics_normal_work", tenant_id=tenant_1) + pg_tenant2 = env.postgres.create_start("test_metrics_normal_work", tenant_id=tenant_2) for pg in [pg_tenant1, pg_tenant2]: with closing(pg.connect()) as conn: @@ -63,29 +65,28 @@ def test_metrics_normal_work(neon_env_builder: NeonEnvBuilder): cur.execute("CREATE TABLE t(key int primary key, value text)") cur.execute("INSERT INTO t SELECT generate_series(1,100000), 'payload'") cur.execute("SELECT sum(key) FROM t") - assert cur.fetchone() == (5000050000, ) + assert cur.fetchone() == (5000050000,) collected_metrics = { "pageserver": env.pageserver.http_client().get_metrics(), } for sk in env.safekeepers: - collected_metrics[f'safekeeper{sk.id}'] = sk.http_client().get_metrics_str() + collected_metrics[f"safekeeper{sk.id}"] = sk.http_client().get_metrics_str() for name in collected_metrics: - basepath = os.path.join(neon_env_builder.repo_dir, f'{name}.metrics') + basepath = os.path.join(neon_env_builder.repo_dir, f"{name}.metrics") - with open(basepath, 'w') as stdout_f: + with open(basepath, "w") as stdout_f: print(collected_metrics[name], file=stdout_f, flush=True) all_metrics = [parse_metrics(m, name) for name, m in collected_metrics.items()] ps_metrics = all_metrics[0] sk_metrics = all_metrics[1:] - ttids = [{ - 'tenant_id': tenant_1.hex, 'timeline_id': timeline_1.hex - }, { - 'tenant_id': tenant_2.hex, 'timeline_id': timeline_2.hex - }] + ttids = [ + {"tenant_id": tenant_1.hex, "timeline_id": timeline_1.hex}, + {"tenant_id": tenant_2.hex, "timeline_id": timeline_2.hex}, + ] # Test metrics per timeline for tt in ttids: @@ -105,7 +106,8 @@ def test_metrics_normal_work(neon_env_builder: NeonEnvBuilder): log.info(f"Checking common metrics for {metrics.name}") log.info( - f"process_cpu_seconds_total: {metrics.query_one('process_cpu_seconds_total').value}") + f"process_cpu_seconds_total: {metrics.query_one('process_cpu_seconds_total').value}" + ) log.info(f"process_threads: {int(metrics.query_one('process_threads').value)}") log.info( f"process_resident_memory_bytes (MB): {metrics.query_one('process_resident_memory_bytes').value / 1024 / 1024}" diff --git a/test_runner/batch_others/test_tenants_with_remote_storage.py b/test_runner/batch_others/test_tenants_with_remote_storage.py index 636616a45b..a127693c32 100644 --- a/test_runner/batch_others/test_tenants_with_remote_storage.py +++ b/test_runner/batch_others/test_tenants_with_remote_storage.py @@ -12,8 +12,15 @@ from typing import List, Tuple from uuid import UUID import pytest - -from fixtures.neon_fixtures import NeonEnvBuilder, NeonEnv, Postgres, RemoteStorageKind, available_remote_storages, wait_for_last_record_lsn, wait_for_upload +from fixtures.neon_fixtures import ( + NeonEnv, + NeonEnvBuilder, + Postgres, + RemoteStorageKind, + available_remote_storages, + wait_for_last_record_lsn, + wait_for_upload, +) from fixtures.utils import lsn_from_hex @@ -28,7 +35,8 @@ async def tenant_workload(env: NeonEnv, pg: Postgres): await pg_conn.execute("CREATE TABLE t(key int primary key, value text)") for i in range(1, 100): await pg_conn.execute( - f"INSERT INTO t SELECT {i}*1000 + g, 'payload' from generate_series(1,1000) g") + f"INSERT INTO t SELECT {i}*1000 + g, 'payload' from generate_series(1,1000) g" + ) # we rely upon autocommit after each statement # as waiting for acceptors happens there @@ -46,11 +54,11 @@ async def all_tenants_workload(env: NeonEnv, tenants_pgs): await asyncio.gather(*workers) -@pytest.mark.parametrize('remote_storatge_kind', available_remote_storages()) +@pytest.mark.parametrize("remote_storatge_kind", available_remote_storages()) def test_tenants_many(neon_env_builder: NeonEnvBuilder, remote_storatge_kind: RemoteStorageKind): neon_env_builder.enable_remote_storage( remote_storage_kind=remote_storatge_kind, - test_name='test_tenants_many', + test_name="test_tenants_many", ) env = neon_env_builder.init_start() @@ -61,12 +69,13 @@ def test_tenants_many(neon_env_builder: NeonEnvBuilder, remote_storatge_kind: Re # Use a tiny checkpoint distance, to create a lot of layers quickly tenant, _ = env.neon_cli.create_tenant( conf={ - 'checkpoint_distance': '5000000', - }) - env.neon_cli.create_timeline(f'test_tenants_many', tenant_id=tenant) + "checkpoint_distance": "5000000", + } + ) + env.neon_cli.create_timeline(f"test_tenants_many", tenant_id=tenant) pg = env.postgres.create_start( - f'test_tenants_many', + f"test_tenants_many", tenant_id=tenant, ) tenants_pgs.append((tenant, pg)) @@ -77,7 +86,8 @@ def test_tenants_many(neon_env_builder: NeonEnvBuilder, remote_storatge_kind: Re pageserver_http = env.pageserver.http_client() for tenant, pg in tenants_pgs: res = pg.safe_psql_many( - ["SHOW neon.tenant_id", "SHOW neon.timeline_id", "SELECT pg_current_wal_flush_lsn()"]) + ["SHOW neon.tenant_id", "SHOW neon.timeline_id", "SELECT pg_current_wal_flush_lsn()"] + ) tenant_id = res[0][0][0] timeline_id = res[1][0][0] current_lsn = lsn_from_hex(res[2][0][0]) diff --git a/test_runner/batch_others/test_timeline_delete.py b/test_runner/batch_others/test_timeline_delete.py index 594475faf4..7a55ffb769 100644 --- a/test_runner/batch_others/test_timeline_delete.py +++ b/test_runner/batch_others/test_timeline_delete.py @@ -1,6 +1,6 @@ from uuid import uuid4 -import pytest +import pytest from fixtures.neon_fixtures import NeonEnv, NeonPageserverApiException, wait_until @@ -17,44 +17,57 @@ def test_timeline_delete(neon_simple_env: NeonEnv): # for non existing tenant: invalid_tenant_id = uuid4() - with pytest.raises(NeonPageserverApiException, - match=f"Tenant {invalid_tenant_id.hex} not found in local tenant state"): + with pytest.raises( + NeonPageserverApiException, + match=f"Tenant {invalid_tenant_id.hex} not found in local tenant state", + ): ps_http.timeline_delete(tenant_id=invalid_tenant_id, timeline_id=invalid_timeline_id) # construct pair of branches to validate that pageserver prohibits # deletion of ancestor timelines when they have child branches parent_timeline_id = env.neon_cli.create_branch("test_ancestor_branch_delete_parent", "empty") - leaf_timeline_id = env.neon_cli.create_branch("test_ancestor_branch_delete_branch1", - "test_ancestor_branch_delete_parent") + leaf_timeline_id = env.neon_cli.create_branch( + "test_ancestor_branch_delete_branch1", "test_ancestor_branch_delete_parent" + ) ps_http = env.pageserver.http_client() - with pytest.raises(NeonPageserverApiException, - match="Cannot detach timeline which has child timelines"): + with pytest.raises( + NeonPageserverApiException, match="Cannot detach timeline which has child timelines" + ): - timeline_path = env.repo_dir / "tenants" / env.initial_tenant.hex / "timelines" / parent_timeline_id.hex + timeline_path = ( + env.repo_dir / "tenants" / env.initial_tenant.hex / "timelines" / parent_timeline_id.hex + ) assert timeline_path.exists() ps_http.timeline_delete(env.initial_tenant, parent_timeline_id) assert not timeline_path.exists() - timeline_path = env.repo_dir / "tenants" / env.initial_tenant.hex / "timelines" / leaf_timeline_id.hex + timeline_path = ( + env.repo_dir / "tenants" / env.initial_tenant.hex / "timelines" / leaf_timeline_id.hex + ) assert timeline_path.exists() # retry deletes when compaction or gc is running in pageserver - wait_until(number_of_iterations=3, - interval=0.2, - func=lambda: ps_http.timeline_delete(env.initial_tenant, leaf_timeline_id)) + wait_until( + number_of_iterations=3, + interval=0.2, + func=lambda: ps_http.timeline_delete(env.initial_tenant, leaf_timeline_id), + ) assert not timeline_path.exists() # check 404 - with pytest.raises(NeonPageserverApiException, - match="is not found neither locally nor remotely"): + with pytest.raises( + NeonPageserverApiException, match="is not found neither locally nor remotely" + ): ps_http.timeline_detail(env.initial_tenant, leaf_timeline_id) # FIXME leaves tenant without timelines, should we prevent deletion of root timeline? - wait_until(number_of_iterations=3, - interval=0.2, - func=lambda: ps_http.timeline_delete(env.initial_tenant, parent_timeline_id)) + wait_until( + number_of_iterations=3, + interval=0.2, + func=lambda: ps_http.timeline_delete(env.initial_tenant, parent_timeline_id), + ) diff --git a/test_runner/batch_others/test_timeline_size.py b/test_runner/batch_others/test_timeline_size.py index 4a9359cf43..76342cdf98 100644 --- a/test_runner/batch_others/test_timeline_size.py +++ b/test_runner/batch_others/test_timeline_size.py @@ -1,25 +1,33 @@ -from contextlib import closing import math import random -from uuid import UUID import re -import psycopg2.extras -import psycopg2.errors -from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder, Postgres, assert_timeline_local, wait_for_last_flush_lsn -from fixtures.log_helper import log import time +from contextlib import closing +from uuid import UUID +import psycopg2.errors +import psycopg2.extras +from fixtures.log_helper import log +from fixtures.neon_fixtures import ( + NeonEnv, + NeonEnvBuilder, + Postgres, + assert_timeline_local, + wait_for_last_flush_lsn, +) from fixtures.utils import get_timeline_dir_size def test_timeline_size(neon_simple_env: NeonEnv): env = neon_simple_env - new_timeline_id = env.neon_cli.create_branch('test_timeline_size', 'empty') + new_timeline_id = env.neon_cli.create_branch("test_timeline_size", "empty") client = env.pageserver.http_client() timeline_details = assert_timeline_local(client, env.initial_tenant, new_timeline_id) - assert timeline_details['local']['current_logical_size'] == timeline_details['local'][ - 'current_logical_size_non_incremental'] + assert ( + timeline_details["local"]["current_logical_size"] + == timeline_details["local"]["current_logical_size_non_incremental"] + ) pgmain = env.postgres.create_start("test_timeline_size") log.info("postgres is running on 'test_timeline_size' branch") @@ -29,32 +37,40 @@ def test_timeline_size(neon_simple_env: NeonEnv): cur.execute("SHOW neon.timeline_id") cur.execute("CREATE TABLE foo (t text)") - cur.execute(""" + cur.execute( + """ INSERT INTO foo SELECT 'long string to consume some space' || g FROM generate_series(1, 10) g - """) + """ + ) res = assert_timeline_local(client, env.initial_tenant, new_timeline_id) - local_details = res['local'] - assert local_details["current_logical_size"] == local_details[ - "current_logical_size_non_incremental"] + local_details = res["local"] + assert ( + local_details["current_logical_size"] + == local_details["current_logical_size_non_incremental"] + ) cur.execute("TRUNCATE foo") res = assert_timeline_local(client, env.initial_tenant, new_timeline_id) - local_details = res['local'] - assert local_details["current_logical_size"] == local_details[ - "current_logical_size_non_incremental"] + local_details = res["local"] + assert ( + local_details["current_logical_size"] + == local_details["current_logical_size_non_incremental"] + ) def test_timeline_size_createdropdb(neon_simple_env: NeonEnv): env = neon_simple_env - new_timeline_id = env.neon_cli.create_branch('test_timeline_size', 'empty') + new_timeline_id = env.neon_cli.create_branch("test_timeline_size", "empty") client = env.pageserver.http_client() timeline_details = assert_timeline_local(client, env.initial_tenant, new_timeline_id) - assert timeline_details['local']['current_logical_size'] == timeline_details['local'][ - 'current_logical_size_non_incremental'] + assert ( + timeline_details["local"]["current_logical_size"] + == timeline_details["local"]["current_logical_size_non_incremental"] + ) pgmain = env.postgres.create_start("test_timeline_size") log.info("postgres is running on 'test_timeline_size' branch") @@ -64,32 +80,40 @@ def test_timeline_size_createdropdb(neon_simple_env: NeonEnv): cur.execute("SHOW neon.timeline_id") res = assert_timeline_local(client, env.initial_tenant, new_timeline_id) - local_details = res['local'] - assert local_details["current_logical_size"] == local_details[ - "current_logical_size_non_incremental"] + local_details = res["local"] + assert ( + local_details["current_logical_size"] + == local_details["current_logical_size_non_incremental"] + ) - cur.execute('CREATE DATABASE foodb') - with closing(pgmain.connect(dbname='foodb')) as conn: + cur.execute("CREATE DATABASE foodb") + with closing(pgmain.connect(dbname="foodb")) as conn: with conn.cursor() as cur2: cur2.execute("CREATE TABLE foo (t text)") - cur2.execute(""" + cur2.execute( + """ INSERT INTO foo SELECT 'long string to consume some space' || g FROM generate_series(1, 10) g - """) + """ + ) res = assert_timeline_local(client, env.initial_tenant, new_timeline_id) - local_details = res['local'] - assert local_details["current_logical_size"] == local_details[ - "current_logical_size_non_incremental"] + local_details = res["local"] + assert ( + local_details["current_logical_size"] + == local_details["current_logical_size_non_incremental"] + ) - cur.execute('DROP DATABASE foodb') + cur.execute("DROP DATABASE foodb") res = assert_timeline_local(client, env.initial_tenant, new_timeline_id) - local_details = res['local'] - assert local_details["current_logical_size"] == local_details[ - "current_logical_size_non_incremental"] + local_details = res["local"] + assert ( + local_details["current_logical_size"] + == local_details["current_logical_size_non_incremental"] + ) # wait until received_lsn_lag is 0 @@ -101,14 +125,17 @@ def wait_for_pageserver_catchup(pgmain: Postgres, polling_interval=1, timeout=60 elapsed = time.time() - started_at if elapsed > timeout: raise RuntimeError( - f"timed out waiting for pageserver to reach pg_current_wal_flush_lsn()") + f"timed out waiting for pageserver to reach pg_current_wal_flush_lsn()" + ) - res = pgmain.safe_psql(''' + res = pgmain.safe_psql( + """ SELECT pg_size_pretty(pg_cluster_size()), pg_wal_lsn_diff(pg_current_wal_flush_lsn(), received_lsn) as received_lsn_lag FROM backpressure_lsns(); - ''')[0] + """ + )[0] log.info(f"pg_cluster_size = {res[0]}, received_lsn_lag = {res[1]}") received_lsn_lag = res[1] @@ -117,17 +144,19 @@ def wait_for_pageserver_catchup(pgmain: Postgres, polling_interval=1, timeout=60 def test_timeline_size_quota(neon_env_builder: NeonEnvBuilder): env = neon_env_builder.init_start() - new_timeline_id = env.neon_cli.create_branch('test_timeline_size_quota') + new_timeline_id = env.neon_cli.create_branch("test_timeline_size_quota") client = env.pageserver.http_client() res = assert_timeline_local(client, env.initial_tenant, new_timeline_id) - assert res['local']["current_logical_size"] == res['local'][ - "current_logical_size_non_incremental"] + assert ( + res["local"]["current_logical_size"] == res["local"]["current_logical_size_non_incremental"] + ) pgmain = env.postgres.create_start( "test_timeline_size_quota", # Set small limit for the test - config_lines=['neon.max_cluster_size=30MB']) + config_lines=["neon.max_cluster_size=30MB"], + ) log.info("postgres is running on 'test_timeline_size_quota' branch") with closing(pgmain.connect()) as conn: @@ -140,19 +169,23 @@ def test_timeline_size_quota(neon_env_builder: NeonEnvBuilder): # Insert many rows. This query must fail because of space limit try: - cur.execute(''' + cur.execute( + """ INSERT INTO foo SELECT 'long string to consume some space' || g FROM generate_series(1, 100000) g - ''') + """ + ) wait_for_pageserver_catchup(pgmain) - cur.execute(''' + cur.execute( + """ INSERT INTO foo SELECT 'long string to consume some space' || g FROM generate_series(1, 500000) g - ''') + """ + ) # If we get here, the timeline size limit failed log.error("Query unexpectedly succeeded") @@ -162,17 +195,19 @@ def test_timeline_size_quota(neon_env_builder: NeonEnvBuilder): log.info(f"Query expectedly failed with: {err}") # drop table to free space - cur.execute('DROP TABLE foo') + cur.execute("DROP TABLE foo") wait_for_pageserver_catchup(pgmain) # create it again and insert some rows. This query must succeed cur.execute("CREATE TABLE foo (t text)") - cur.execute(''' + cur.execute( + """ INSERT INTO foo SELECT 'long string to consume some space' || g FROM generate_series(1, 10000) g - ''') + """ + ) wait_for_pageserver_catchup(pgmain) @@ -183,15 +218,17 @@ def test_timeline_size_quota(neon_env_builder: NeonEnvBuilder): def test_timeline_physical_size_init(neon_simple_env: NeonEnv): env = neon_simple_env - new_timeline_id = env.neon_cli.create_branch('test_timeline_physical_size_init') + new_timeline_id = env.neon_cli.create_branch("test_timeline_physical_size_init") pg = env.postgres.create_start("test_timeline_physical_size_init") - pg.safe_psql_many([ - "CREATE TABLE foo (t text)", - """INSERT INTO foo + pg.safe_psql_many( + [ + "CREATE TABLE foo (t text)", + """INSERT INTO foo SELECT 'long string to consume some space' || g FROM generate_series(1, 1000) g""", - ]) + ] + ) wait_for_last_flush_lsn(env, pg, env.initial_tenant, new_timeline_id) @@ -204,15 +241,17 @@ def test_timeline_physical_size_init(neon_simple_env: NeonEnv): def test_timeline_physical_size_post_checkpoint(neon_simple_env: NeonEnv): env = neon_simple_env - new_timeline_id = env.neon_cli.create_branch('test_timeline_physical_size_post_checkpoint') + new_timeline_id = env.neon_cli.create_branch("test_timeline_physical_size_post_checkpoint") pg = env.postgres.create_start("test_timeline_physical_size_post_checkpoint") - pg.safe_psql_many([ - "CREATE TABLE foo (t text)", - """INSERT INTO foo + pg.safe_psql_many( + [ + "CREATE TABLE foo (t text)", + """INSERT INTO foo SELECT 'long string to consume some space' || g FROM generate_series(1, 1000) g""", - ]) + ] + ) wait_for_last_flush_lsn(env, pg, env.initial_tenant, new_timeline_id) env.pageserver.safe_psql(f"checkpoint {env.initial_tenant.hex} {new_timeline_id.hex}") @@ -223,19 +262,23 @@ def test_timeline_physical_size_post_checkpoint(neon_simple_env: NeonEnv): def test_timeline_physical_size_post_compaction(neon_env_builder: NeonEnvBuilder): # Disable background compaction as we don't want it to happen after `get_physical_size` request # and before checking the expected size on disk, which makes the assertion failed - neon_env_builder.pageserver_config_override = "tenant_config={checkpoint_distance=100000, compaction_period='10m'}" + neon_env_builder.pageserver_config_override = ( + "tenant_config={checkpoint_distance=100000, compaction_period='10m'}" + ) env = neon_env_builder.init_start() - new_timeline_id = env.neon_cli.create_branch('test_timeline_physical_size_post_compaction') + new_timeline_id = env.neon_cli.create_branch("test_timeline_physical_size_post_compaction") pg = env.postgres.create_start("test_timeline_physical_size_post_compaction") - pg.safe_psql_many([ - "CREATE TABLE foo (t text)", - """INSERT INTO foo + pg.safe_psql_many( + [ + "CREATE TABLE foo (t text)", + """INSERT INTO foo SELECT 'long string to consume some space' || g FROM generate_series(1, 100000) g""", - ]) + ] + ) wait_for_last_flush_lsn(env, pg, env.initial_tenant, new_timeline_id) env.pageserver.safe_psql(f"checkpoint {env.initial_tenant.hex} {new_timeline_id.hex}") @@ -247,29 +290,32 @@ def test_timeline_physical_size_post_compaction(neon_env_builder: NeonEnvBuilder def test_timeline_physical_size_post_gc(neon_env_builder: NeonEnvBuilder): # Disable background compaction and GC as we don't want it to happen after `get_physical_size` request # and before checking the expected size on disk, which makes the assertion failed - neon_env_builder.pageserver_config_override = \ - "tenant_config={checkpoint_distance=100000, compaction_period='10m', gc_period='10m', pitr_interval='1s'}" + neon_env_builder.pageserver_config_override = "tenant_config={checkpoint_distance=100000, compaction_period='10m', gc_period='10m', pitr_interval='1s'}" env = neon_env_builder.init_start() - new_timeline_id = env.neon_cli.create_branch('test_timeline_physical_size_post_gc') + new_timeline_id = env.neon_cli.create_branch("test_timeline_physical_size_post_gc") pg = env.postgres.create_start("test_timeline_physical_size_post_gc") - pg.safe_psql_many([ - "CREATE TABLE foo (t text)", - """INSERT INTO foo + pg.safe_psql_many( + [ + "CREATE TABLE foo (t text)", + """INSERT INTO foo SELECT 'long string to consume some space' || g FROM generate_series(1, 100000) g""", - ]) + ] + ) wait_for_last_flush_lsn(env, pg, env.initial_tenant, new_timeline_id) env.pageserver.safe_psql(f"checkpoint {env.initial_tenant.hex} {new_timeline_id.hex}") - pg.safe_psql(""" + pg.safe_psql( + """ INSERT INTO foo SELECT 'long string to consume some space' || g FROM generate_series(1, 100000) g - """) + """ + ) wait_for_last_flush_lsn(env, pg, env.initial_tenant, new_timeline_id) env.pageserver.safe_psql(f"checkpoint {env.initial_tenant.hex} {new_timeline_id.hex}") @@ -284,15 +330,17 @@ def test_timeline_physical_size_post_gc(neon_env_builder: NeonEnvBuilder): def test_timeline_size_metrics(neon_simple_env: NeonEnv): env = neon_simple_env - new_timeline_id = env.neon_cli.create_branch('test_timeline_size_metrics') + new_timeline_id = env.neon_cli.create_branch("test_timeline_size_metrics") pg = env.postgres.create_start("test_timeline_size_metrics") - pg.safe_psql_many([ - "CREATE TABLE foo (t text)", - """INSERT INTO foo + pg.safe_psql_many( + [ + "CREATE TABLE foo (t text)", + """INSERT INTO foo SELECT 'long string to consume some space' || g FROM generate_series(1, 100000) g""", - ]) + ] + ) wait_for_last_flush_lsn(env, pg, env.initial_tenant, new_timeline_id) env.pageserver.safe_psql(f"checkpoint {env.initial_tenant.hex} {new_timeline_id.hex}") @@ -302,7 +350,8 @@ def test_timeline_size_metrics(neon_simple_env: NeonEnv): matches = re.search( f'^pageserver_current_physical_size{{tenant_id="{env.initial_tenant.hex}",timeline_id="{new_timeline_id.hex}"}} (\\S+)$', metrics, - re.MULTILINE) + re.MULTILINE, + ) assert matches tl_physical_size_metric = int(matches.group(1)) @@ -314,7 +363,8 @@ def test_timeline_size_metrics(neon_simple_env: NeonEnv): matches = re.search( f'^pageserver_current_logical_size{{tenant_id="{env.initial_tenant.hex}",timeline_id="{new_timeline_id.hex}"}} (\\S+)$', metrics, - re.MULTILINE) + re.MULTILINE, + ) assert matches tl_logical_size_metric = int(matches.group(1)) @@ -341,7 +391,7 @@ def test_tenant_physical_size(neon_simple_env: NeonEnv): def get_timeline_physical_size(timeline: UUID): res = client.timeline_detail(tenant, timeline) - return res['local']['current_physical_size_non_incremental'] + return res["local"]["current_physical_size_non_incremental"] timeline_total_size = get_timeline_physical_size(timeline) for i in range(10): @@ -350,10 +400,12 @@ def test_tenant_physical_size(neon_simple_env: NeonEnv): timeline = env.neon_cli.create_branch(f"test_tenant_physical_size_{i}", tenant_id=tenant) pg = env.postgres.create_start(f"test_tenant_physical_size_{i}", tenant_id=tenant) - pg.safe_psql_many([ - "CREATE TABLE foo (t text)", - f"INSERT INTO foo SELECT 'long string to consume some space' || g FROM generate_series(1, {n_rows}) g", - ]) + pg.safe_psql_many( + [ + "CREATE TABLE foo (t text)", + f"INSERT INTO foo SELECT 'long string to consume some space' || g FROM generate_series(1, {n_rows}) g", + ] + ) wait_for_last_flush_lsn(env, pg, tenant, timeline) env.pageserver.safe_psql(f"checkpoint {tenant.hex} {timeline.hex}") @@ -362,7 +414,7 @@ def test_tenant_physical_size(neon_simple_env: NeonEnv): pg.stop() - tenant_physical_size = int(client.tenant_status(tenant_id=tenant)['current_physical_size']) + tenant_physical_size = int(client.tenant_status(tenant_id=tenant)["current_physical_size"]) assert tenant_physical_size == timeline_total_size @@ -372,6 +424,8 @@ def assert_physical_size(env: NeonEnv, tenant_id: UUID, timeline_id: UUID): client = env.pageserver.http_client() res = assert_timeline_local(client, tenant_id, timeline_id) timeline_path = env.timeline_dir(tenant_id, timeline_id) - assert res["local"]["current_physical_size"] == res["local"][ - "current_physical_size_non_incremental"] + assert ( + res["local"]["current_physical_size"] + == res["local"]["current_physical_size_non_incremental"] + ) assert res["local"]["current_physical_size"] == get_timeline_dir_size(timeline_path) diff --git a/test_runner/batch_others/test_twophase.py b/test_runner/batch_others/test_twophase.py index 04e3d0b7bc..e01ba7caef 100644 --- a/test_runner/batch_others/test_twophase.py +++ b/test_runner/batch_others/test_twophase.py @@ -1,7 +1,7 @@ import os -from fixtures.neon_fixtures import NeonEnv from fixtures.log_helper import log +from fixtures.neon_fixtures import NeonEnv # @@ -10,37 +10,37 @@ from fixtures.log_helper import log def test_twophase(neon_simple_env: NeonEnv): env = neon_simple_env env.neon_cli.create_branch("test_twophase", "empty") - pg = env.postgres.create_start('test_twophase', config_lines=['max_prepared_transactions=5']) + pg = env.postgres.create_start("test_twophase", config_lines=["max_prepared_transactions=5"]) log.info("postgres is running on 'test_twophase' branch") conn = pg.connect() cur = conn.cursor() - cur.execute('CREATE TABLE foo (t text)') + cur.execute("CREATE TABLE foo (t text)") # Prepare a transaction that will insert a row - cur.execute('BEGIN') + cur.execute("BEGIN") cur.execute("INSERT INTO foo VALUES ('one')") cur.execute("PREPARE TRANSACTION 'insert_one'") # Prepare another transaction that will insert a row - cur.execute('BEGIN') + cur.execute("BEGIN") cur.execute("INSERT INTO foo VALUES ('two')") cur.execute("PREPARE TRANSACTION 'insert_two'") # Prepare a transaction that will insert a row - cur.execute('BEGIN') + cur.execute("BEGIN") cur.execute("INSERT INTO foo VALUES ('three')") cur.execute("PREPARE TRANSACTION 'insert_three'") # Prepare another transaction that will insert a row - cur.execute('BEGIN') + cur.execute("BEGIN") cur.execute("INSERT INTO foo VALUES ('four')") cur.execute("PREPARE TRANSACTION 'insert_four'") # On checkpoint state data copied to files in # pg_twophase directory and fsynced - cur.execute('CHECKPOINT') + cur.execute("CHECKPOINT") twophase_files = os.listdir(pg.pg_twophase_dir_path()) log.info(twophase_files) @@ -48,7 +48,7 @@ def test_twophase(neon_simple_env: NeonEnv): cur.execute("COMMIT PREPARED 'insert_three'") cur.execute("ROLLBACK PREPARED 'insert_four'") - cur.execute('CHECKPOINT') + cur.execute("CHECKPOINT") twophase_files = os.listdir(pg.pg_twophase_dir_path()) log.info(twophase_files) @@ -59,8 +59,8 @@ def test_twophase(neon_simple_env: NeonEnv): # Start compute on the new branch pg2 = env.postgres.create_start( - 'test_twophase_prepared', - config_lines=['max_prepared_transactions=5'], + "test_twophase_prepared", + config_lines=["max_prepared_transactions=5"], ) # Check that we restored only needed twophase files @@ -76,9 +76,9 @@ def test_twophase(neon_simple_env: NeonEnv): cur2.execute("COMMIT PREPARED 'insert_one'") cur2.execute("ROLLBACK PREPARED 'insert_two'") - cur2.execute('SELECT * FROM foo') - assert cur2.fetchall() == [('one', ), ('three', )] + cur2.execute("SELECT * FROM foo") + assert cur2.fetchall() == [("one",), ("three",)] # Only one committed insert is visible on the original branch - cur.execute('SELECT * FROM foo') - assert cur.fetchall() == [('three', )] + cur.execute("SELECT * FROM foo") + assert cur.fetchall() == [("three",)] diff --git a/test_runner/batch_others/test_vm_bits.py b/test_runner/batch_others/test_vm_bits.py index 29b55f5b8c..c147c6dff5 100644 --- a/test_runner/batch_others/test_vm_bits.py +++ b/test_runner/batch_others/test_vm_bits.py @@ -1,5 +1,5 @@ -from fixtures.neon_fixtures import NeonEnv from fixtures.log_helper import log +from fixtures.neon_fixtures import NeonEnv # @@ -10,48 +10,50 @@ def test_vm_bit_clear(neon_simple_env: NeonEnv): env = neon_simple_env env.neon_cli.create_branch("test_vm_bit_clear", "empty") - pg = env.postgres.create_start('test_vm_bit_clear') + pg = env.postgres.create_start("test_vm_bit_clear") log.info("postgres is running on 'test_vm_bit_clear' branch") pg_conn = pg.connect() cur = pg_conn.cursor() # Install extension containing function needed for test - cur.execute('CREATE EXTENSION neon_test_utils') + cur.execute("CREATE EXTENSION neon_test_utils") # Create a test table and freeze it to set the VM bit. - cur.execute('CREATE TABLE vmtest_delete (id integer PRIMARY KEY)') - cur.execute('INSERT INTO vmtest_delete VALUES (1)') - cur.execute('VACUUM FREEZE vmtest_delete') + cur.execute("CREATE TABLE vmtest_delete (id integer PRIMARY KEY)") + cur.execute("INSERT INTO vmtest_delete VALUES (1)") + cur.execute("VACUUM FREEZE vmtest_delete") - cur.execute('CREATE TABLE vmtest_update (id integer PRIMARY KEY)') - cur.execute('INSERT INTO vmtest_update SELECT g FROM generate_series(1, 1000) g') - cur.execute('VACUUM FREEZE vmtest_update') + cur.execute("CREATE TABLE vmtest_update (id integer PRIMARY KEY)") + cur.execute("INSERT INTO vmtest_update SELECT g FROM generate_series(1, 1000) g") + cur.execute("VACUUM FREEZE vmtest_update") # DELETE and UPDATE the rows. - cur.execute('DELETE FROM vmtest_delete WHERE id = 1') - cur.execute('UPDATE vmtest_update SET id = 5000 WHERE id = 1') + cur.execute("DELETE FROM vmtest_delete WHERE id = 1") + cur.execute("UPDATE vmtest_update SET id = 5000 WHERE id = 1") # Branch at this point, to test that later env.neon_cli.create_branch("test_vm_bit_clear_new", "test_vm_bit_clear") # Clear the buffer cache, to force the VM page to be re-fetched from # the page server - cur.execute('SELECT clear_buffer_cache()') + cur.execute("SELECT clear_buffer_cache()") # Check that an index-only scan doesn't see the deleted row. If the # clearing of the VM bit was not replayed correctly, this would incorrectly # return deleted row. - cur.execute(''' + cur.execute( + """ set enable_seqscan=off; set enable_indexscan=on; set enable_bitmapscan=off; - ''') + """ + ) - cur.execute('SELECT * FROM vmtest_delete WHERE id = 1') - assert (cur.fetchall() == []) - cur.execute('SELECT * FROM vmtest_update WHERE id = 1') - assert (cur.fetchall() == []) + cur.execute("SELECT * FROM vmtest_delete WHERE id = 1") + assert cur.fetchall() == [] + cur.execute("SELECT * FROM vmtest_update WHERE id = 1") + assert cur.fetchall() == [] cur.close() @@ -61,19 +63,21 @@ def test_vm_bit_clear(neon_simple_env: NeonEnv): # a dirty VM page is evicted. If the VM bit was not correctly cleared by the # earlier WAL record, the full-page image hides the problem. Starting a new # server at the right point-in-time avoids that full-page image. - pg_new = env.postgres.create_start('test_vm_bit_clear_new') + pg_new = env.postgres.create_start("test_vm_bit_clear_new") log.info("postgres is running on 'test_vm_bit_clear_new' branch") pg_new_conn = pg_new.connect() cur_new = pg_new_conn.cursor() - cur_new.execute(''' + cur_new.execute( + """ set enable_seqscan=off; set enable_indexscan=on; set enable_bitmapscan=off; - ''') + """ + ) - cur_new.execute('SELECT * FROM vmtest_delete WHERE id = 1') - assert (cur_new.fetchall() == []) - cur_new.execute('SELECT * FROM vmtest_update WHERE id = 1') - assert (cur_new.fetchall() == []) + cur_new.execute("SELECT * FROM vmtest_delete WHERE id = 1") + assert cur_new.fetchall() == [] + cur_new.execute("SELECT * FROM vmtest_update WHERE id = 1") + assert cur_new.fetchall() == [] diff --git a/test_runner/batch_others/test_wal_acceptor.py b/test_runner/batch_others/test_wal_acceptor.py index d922dd0cb4..7710ef86cd 100644 --- a/test_runner/batch_others/test_wal_acceptor.py +++ b/test_runner/batch_others/test_wal_acceptor.py @@ -1,42 +1,59 @@ -import pathlib -import pytest -import random -import time import os +import pathlib +import random import shutil import signal import subprocess import sys import threading +import time import uuid - from contextlib import closing from dataclasses import dataclass, field from pathlib import Path -from fixtures.neon_fixtures import NeonPageserver, PgBin, Etcd, Postgres, RemoteStorageKind, RemoteStorageUsers, Safekeeper, NeonEnv, NeonEnvBuilder, PortDistributor, SafekeeperPort, available_remote_storages, neon_binpath, PgProtocol, wait_for_last_record_lsn, wait_for_upload -from fixtures.utils import get_dir_size, lsn_to_hex, lsn_from_hex, query_scalar -from fixtures.log_helper import log -from typing import List, Optional, Any +from typing import Any, List, Optional from uuid import uuid4 +import pytest +from fixtures.log_helper import log +from fixtures.neon_fixtures import ( + Etcd, + NeonEnv, + NeonEnvBuilder, + NeonPageserver, + PgBin, + PgProtocol, + PortDistributor, + Postgres, + RemoteStorageKind, + RemoteStorageUsers, + Safekeeper, + SafekeeperPort, + available_remote_storages, + neon_binpath, + wait_for_last_record_lsn, + wait_for_upload, +) +from fixtures.utils import get_dir_size, lsn_from_hex, lsn_to_hex, query_scalar -def wait_lsn_force_checkpoint(tenant_id: str, - timeline_id: str, - pg: Postgres, - ps: NeonPageserver, - pageserver_conn_options={}): - lsn = lsn_from_hex(pg.safe_psql('SELECT pg_current_wal_flush_lsn()')[0][0]) + +def wait_lsn_force_checkpoint( + tenant_id: str, timeline_id: str, pg: Postgres, ps: NeonPageserver, pageserver_conn_options={} +): + lsn = lsn_from_hex(pg.safe_psql("SELECT pg_current_wal_flush_lsn()")[0][0]) log.info(f"pg_current_wal_flush_lsn is {lsn_to_hex(lsn)}, waiting for it on pageserver") auth_token = None - if 'password' in pageserver_conn_options: - auth_token = pageserver_conn_options['password'] + if "password" in pageserver_conn_options: + auth_token = pageserver_conn_options["password"] # wait for the pageserver to catch up - wait_for_last_record_lsn(ps.http_client(auth_token=auth_token), - uuid.UUID(hex=tenant_id), - uuid.UUID(hex=timeline_id), - lsn) + wait_for_last_record_lsn( + ps.http_client(auth_token=auth_token), + uuid.UUID(hex=tenant_id), + uuid.UUID(hex=timeline_id), + lsn, + ) # force checkpoint to advance remote_consistent_lsn with closing(ps.connect(**pageserver_conn_options)) as psconn: @@ -44,10 +61,12 @@ def wait_lsn_force_checkpoint(tenant_id: str, pscur.execute(f"checkpoint {tenant_id} {timeline_id}") # ensure that remote_consistent_lsn is advanced - wait_for_upload(ps.http_client(auth_token=auth_token), - uuid.UUID(hex=tenant_id), - uuid.UUID(hex=timeline_id), - lsn) + wait_for_upload( + ps.http_client(auth_token=auth_token), + uuid.UUID(hex=tenant_id), + uuid.UUID(hex=timeline_id), + lsn, + ) @dataclass @@ -89,7 +108,8 @@ def test_many_timelines(neon_env_builder: NeonEnvBuilder): with env.pageserver.http_client() as pageserver_http: timeline_details = [ pageserver_http.timeline_detail( - tenant_id=tenant_id, timeline_id=branch_names_to_timeline_ids[branch_name]) + tenant_id=tenant_id, timeline_id=branch_names_to_timeline_ids[branch_name] + ) for branch_name in branch_names ] # All changes visible to pageserver (last_record_lsn) should be @@ -105,14 +125,14 @@ def test_many_timelines(neon_env_builder: NeonEnvBuilder): for timeline_detail in timeline_details: timeline_id: str = timeline_detail["timeline_id"] - local_timeline_detail = timeline_detail.get('local') + local_timeline_detail = timeline_detail.get("local") if local_timeline_detail is None: log.debug(f"Timeline {timeline_id} is not present locally, skipping") continue m = TimelineMetrics( timeline_id=timeline_id, - last_record_lsn=lsn_from_hex(local_timeline_detail['last_record_lsn']), + last_record_lsn=lsn_from_hex(local_timeline_detail["last_record_lsn"]), ) for sk_m in sk_metrics: m.flush_lsns.append(sk_m.flush_lsn_inexact[(tenant_id.hex, timeline_id)]) @@ -120,14 +140,20 @@ def test_many_timelines(neon_env_builder: NeonEnvBuilder): for flush_lsn, commit_lsn in zip(m.flush_lsns, m.commit_lsns): # Invariant. May be < when transaction is in progress. - assert commit_lsn <= flush_lsn, f"timeline_id={timeline_id}, timeline_detail={timeline_detail}, sk_metrics={sk_metrics}" + assert ( + commit_lsn <= flush_lsn + ), f"timeline_id={timeline_id}, timeline_detail={timeline_detail}, sk_metrics={sk_metrics}" # We only call collect_metrics() after a transaction is confirmed by # the compute node, which only happens after a consensus of safekeepers # has confirmed the transaction. We assume majority consensus here. - assert (2 * sum(m.last_record_lsn <= lsn - for lsn in m.flush_lsns) > neon_env_builder.num_safekeepers), f"timeline_id={timeline_id}, timeline_detail={timeline_detail}, sk_metrics={sk_metrics}" - assert (2 * sum(m.last_record_lsn <= lsn - for lsn in m.commit_lsns) > neon_env_builder.num_safekeepers), f"timeline_id={timeline_id}, timeline_detail={timeline_detail}, sk_metrics={sk_metrics}" + assert ( + 2 * sum(m.last_record_lsn <= lsn for lsn in m.flush_lsns) + > neon_env_builder.num_safekeepers + ), f"timeline_id={timeline_id}, timeline_detail={timeline_detail}, sk_metrics={sk_metrics}" + assert ( + 2 * sum(m.last_record_lsn <= lsn for lsn in m.commit_lsns) + > neon_env_builder.num_safekeepers + ), f"timeline_id={timeline_id}, timeline_detail={timeline_detail}, sk_metrics={sk_metrics}" timeline_metrics.append(m) log.info(f"{message}: {timeline_metrics}") return timeline_metrics @@ -155,8 +181,10 @@ def test_many_timelines(neon_env_builder: NeonEnvBuilder): collect_metrics("during INSERT INTO") time.sleep(1) except: - log.error("MetricsChecker's thread failed, the test will be failed on .stop() call", - exc_info=True) + log.error( + "MetricsChecker's thread failed, the test will be failed on .stop() call", + exc_info=True, + ) # We want to preserve traceback as well as the exception exc_type, exc_value, exc_tb = sys.exc_info() assert exc_type @@ -183,7 +211,7 @@ def test_many_timelines(neon_env_builder: NeonEnvBuilder): # Check data for 2/3 timelines for pg in pgs[:-1]: res = pg.safe_psql("SELECT sum(key) FROM t") - assert res[0] == (5000050000, ) + assert res[0] == (5000050000,) final_m = collect_metrics("after SELECT") # Assume that LSNs (a) behave similarly in all timelines; and (b) INSERT INTO alters LSN significantly. @@ -208,8 +236,8 @@ def test_restarts(neon_env_builder: NeonEnvBuilder): neon_env_builder.num_safekeepers = n_acceptors env = neon_env_builder.init_start() - env.neon_cli.create_branch('test_safekeepers_restarts') - pg = env.postgres.create_start('test_safekeepers_restarts') + env.neon_cli.create_branch("test_safekeepers_restarts") + pg = env.postgres.create_start("test_safekeepers_restarts") # we rely upon autocommit after each statement # as waiting for acceptors happens there @@ -217,9 +245,9 @@ def test_restarts(neon_env_builder: NeonEnvBuilder): cur = pg_conn.cursor() failed_node = None - cur.execute('CREATE TABLE t(key int primary key, value text)') + cur.execute("CREATE TABLE t(key int primary key, value text)") for i in range(n_inserts): - cur.execute("INSERT INTO t values (%s, 'payload');", (i + 1, )) + cur.execute("INSERT INTO t values (%s, 'payload');", (i + 1,)) if random.random() <= fault_probability: if failed_node is None: @@ -228,7 +256,7 @@ def test_restarts(neon_env_builder: NeonEnvBuilder): else: failed_node.start() failed_node = None - assert query_scalar(cur, 'SELECT sum(key) FROM t') == 500500 + assert query_scalar(cur, "SELECT sum(key) FROM t") == 500500 # Test that safekeepers push their info to the broker and learn peer status from it @@ -238,7 +266,7 @@ def test_broker(neon_env_builder: NeonEnvBuilder): env = neon_env_builder.init_start() env.neon_cli.create_branch("test_broker", "main") - pg = env.postgres.create_start('test_broker') + pg = env.postgres.create_start("test_broker") pg.safe_psql("CREATE TABLE t(key int primary key, value text)") # learn neon timeline from compute @@ -260,9 +288,10 @@ def test_broker(neon_env_builder: NeonEnvBuilder): while True: stat_after = [cli.timeline_status(tenant_id, timeline_id) for cli in clients] if all( - lsn_from_hex(s_after.remote_consistent_lsn) > lsn_from_hex( - s_before.remote_consistent_lsn) for s_after, - s_before in zip(stat_after, stat_before)): + lsn_from_hex(s_after.remote_consistent_lsn) + > lsn_from_hex(s_before.remote_consistent_lsn) + for s_after, s_before in zip(stat_after, stat_before) + ): break elapsed = time.time() - started_at if elapsed > 20: @@ -273,7 +302,7 @@ def test_broker(neon_env_builder: NeonEnvBuilder): # Test that old WAL consumed by peers and pageserver is removed from safekeepers. -@pytest.mark.parametrize('auth_enabled', [False, True]) +@pytest.mark.parametrize("auth_enabled", [False, True]) def test_wal_removal(neon_env_builder: NeonEnvBuilder, auth_enabled: bool): neon_env_builder.num_safekeepers = 2 # to advance remote_consistent_lsn @@ -281,16 +310,18 @@ def test_wal_removal(neon_env_builder: NeonEnvBuilder, auth_enabled: bool): neon_env_builder.auth_enabled = auth_enabled env = neon_env_builder.init_start() - env.neon_cli.create_branch('test_safekeepers_wal_removal') - pg = env.postgres.create_start('test_safekeepers_wal_removal') + env.neon_cli.create_branch("test_safekeepers_wal_removal") + pg = env.postgres.create_start("test_safekeepers_wal_removal") # Note: it is important to insert at least two segments, as currently # control file is synced roughly once in segment range and WAL is not # removed until all horizons are persisted. - pg.safe_psql_many([ - 'CREATE TABLE t(key int primary key, value text)', - "INSERT INTO t SELECT generate_series(1,200000), 'payload'", - ]) + pg.safe_psql_many( + [ + "CREATE TABLE t(key int primary key, value text)", + "INSERT INTO t SELECT generate_series(1,200000), 'payload'", + ] + ) tenant_id = pg.safe_psql("show neon.tenant_id")[0][0] timeline_id = pg.safe_psql("show neon.timeline_id")[0][0] @@ -298,12 +329,12 @@ def test_wal_removal(neon_env_builder: NeonEnvBuilder, auth_enabled: bool): # force checkpoint to advance remote_consistent_lsn pageserver_conn_options = {} if auth_enabled: - pageserver_conn_options['password'] = env.auth_keys.generate_tenant_token(tenant_id) + pageserver_conn_options["password"] = env.auth_keys.generate_tenant_token(tenant_id) wait_lsn_force_checkpoint(tenant_id, timeline_id, pg, env.pageserver, pageserver_conn_options) # We will wait for first segment removal. Make sure they exist for starter. first_segments = [ - os.path.join(sk.data_dir(), tenant_id, timeline_id, '000000010000000000000001') + os.path.join(sk.data_dir(), tenant_id, timeline_id, "000000010000000000000001") for sk in env.safekeepers ] assert all(os.path.exists(p) for p in first_segments) @@ -312,25 +343,33 @@ def test_wal_removal(neon_env_builder: NeonEnvBuilder, auth_enabled: bool): http_cli = env.safekeepers[0].http_client() else: http_cli = env.safekeepers[0].http_client( - auth_token=env.auth_keys.generate_tenant_token(tenant_id)) + auth_token=env.auth_keys.generate_tenant_token(tenant_id) + ) http_cli_other = env.safekeepers[0].http_client( - auth_token=env.auth_keys.generate_tenant_token(uuid4().hex)) + auth_token=env.auth_keys.generate_tenant_token(uuid4().hex) + ) http_cli_noauth = env.safekeepers[0].http_client() # Pretend WAL is offloaded to s3. if auth_enabled: - old_backup_lsn = http_cli.timeline_status(tenant_id=tenant_id, - timeline_id=timeline_id).backup_lsn - assert 'FFFFFFFF/FEFFFFFF' != old_backup_lsn + old_backup_lsn = http_cli.timeline_status( + tenant_id=tenant_id, timeline_id=timeline_id + ).backup_lsn + assert "FFFFFFFF/FEFFFFFF" != old_backup_lsn for cli in [http_cli_other, http_cli_noauth]: - with pytest.raises(cli.HTTPError, match='Forbidden|Unauthorized'): - cli.record_safekeeper_info(tenant_id, - timeline_id, {'backup_lsn': 'FFFFFFFF/FEFFFFFF'}) - assert old_backup_lsn == http_cli.timeline_status(tenant_id=tenant_id, - timeline_id=timeline_id).backup_lsn - http_cli.record_safekeeper_info(tenant_id, timeline_id, {'backup_lsn': 'FFFFFFFF/FEFFFFFF'}) - assert 'FFFFFFFF/FEFFFFFF' == http_cli.timeline_status(tenant_id=tenant_id, - timeline_id=timeline_id).backup_lsn + with pytest.raises(cli.HTTPError, match="Forbidden|Unauthorized"): + cli.record_safekeeper_info( + tenant_id, timeline_id, {"backup_lsn": "FFFFFFFF/FEFFFFFF"} + ) + assert ( + old_backup_lsn + == http_cli.timeline_status(tenant_id=tenant_id, timeline_id=timeline_id).backup_lsn + ) + http_cli.record_safekeeper_info(tenant_id, timeline_id, {"backup_lsn": "FFFFFFFF/FEFFFFFF"}) + assert ( + "FFFFFFFF/FEFFFFFF" + == http_cli.timeline_status(tenant_id=tenant_id, timeline_id=timeline_id).backup_lsn + ) # wait till first segment is removed on all safekeepers started_at = time.time() @@ -355,7 +394,8 @@ def wait_segment_offload(tenant_id, timeline_id, live_sk, seg_end): elapsed = time.time() - started_at if elapsed > 30: raise RuntimeError( - f"timed out waiting {elapsed:.0f}s for segment ending at {seg_end} get offloaded") + f"timed out waiting {elapsed:.0f}s for segment ending at {seg_end} get offloaded" + ) time.sleep(0.5) @@ -364,8 +404,9 @@ def wait_wal_trim(tenant_id, timeline_id, sk, target_size): http_cli = sk.http_client() while True: tli_status = http_cli.timeline_status(tenant_id, timeline_id) - sk_wal_size = get_dir_size(os.path.join(sk.data_dir(), tenant_id, - timeline_id)) / 1024 / 1024 + sk_wal_size = ( + get_dir_size(os.path.join(sk.data_dir(), tenant_id, timeline_id)) / 1024 / 1024 + ) log.info(f"Safekeeper id={sk.id} wal_size={sk_wal_size:.2f}MB status={tli_status}") if sk_wal_size <= target_size: @@ -379,21 +420,21 @@ def wait_wal_trim(tenant_id, timeline_id, sk, target_size): time.sleep(0.5) -@pytest.mark.parametrize('remote_storatge_kind', available_remote_storages()) +@pytest.mark.parametrize("remote_storatge_kind", available_remote_storages()) def test_wal_backup(neon_env_builder: NeonEnvBuilder, remote_storatge_kind: RemoteStorageKind): neon_env_builder.num_safekeepers = 3 neon_env_builder.enable_remote_storage( remote_storage_kind=remote_storatge_kind, - test_name='test_safekeepers_wal_backup', + test_name="test_safekeepers_wal_backup", ) neon_env_builder.remote_storage_users = RemoteStorageUsers.SAFEKEEPER env = neon_env_builder.init_start() - env.neon_cli.create_branch('test_safekeepers_wal_backup') - pg = env.postgres.create_start('test_safekeepers_wal_backup') + env.neon_cli.create_branch("test_safekeepers_wal_backup") + pg = env.postgres.create_start("test_safekeepers_wal_backup") # learn neon timeline from compute tenant_id = pg.safe_psql("show neon.tenant_id")[0][0] @@ -401,11 +442,11 @@ def test_wal_backup(neon_env_builder: NeonEnvBuilder, remote_storatge_kind: Remo pg_conn = pg.connect() cur = pg_conn.cursor() - cur.execute('create table t(key int, value text)') + cur.execute("create table t(key int, value text)") # Shut down subsequently each of safekeepers and fill a segment while sk is # down; ensure segment gets offloaded by others. - offloaded_seg_end = ['0/2000000', '0/3000000', '0/4000000'] + offloaded_seg_end = ["0/2000000", "0/3000000", "0/4000000"] for victim, seg_end in zip(env.safekeepers, offloaded_seg_end): victim.stop() # roughly fills one segment @@ -419,36 +460,36 @@ def test_wal_backup(neon_env_builder: NeonEnvBuilder, remote_storatge_kind: Remo # put one of safekeepers down again env.safekeepers[0].stop() # restart postgres - pg.stop_and_destroy().create_start('test_safekeepers_wal_backup') + pg.stop_and_destroy().create_start("test_safekeepers_wal_backup") # and ensure offloading still works with closing(pg.connect()) as conn: with conn.cursor() as cur: cur.execute("insert into t select generate_series(1,250000), 'payload'") - wait_segment_offload(tenant_id, timeline_id, env.safekeepers[1], '0/5000000') + wait_segment_offload(tenant_id, timeline_id, env.safekeepers[1], "0/5000000") -@pytest.mark.parametrize('remote_storatge_kind', available_remote_storages()) +@pytest.mark.parametrize("remote_storatge_kind", available_remote_storages()) def test_s3_wal_replay(neon_env_builder: NeonEnvBuilder, remote_storatge_kind: RemoteStorageKind): neon_env_builder.num_safekeepers = 3 neon_env_builder.enable_remote_storage( remote_storage_kind=remote_storatge_kind, - test_name='test_s3_wal_replay', + test_name="test_s3_wal_replay", ) neon_env_builder.remote_storage_users = RemoteStorageUsers.SAFEKEEPER env = neon_env_builder.init_start() - env.neon_cli.create_branch('test_s3_wal_replay') + env.neon_cli.create_branch("test_s3_wal_replay") env.pageserver.stop() - pageserver_tenants_dir = os.path.join(env.repo_dir, 'tenants') - pageserver_fresh_copy = os.path.join(env.repo_dir, 'tenants_fresh') + pageserver_tenants_dir = os.path.join(env.repo_dir, "tenants") + pageserver_fresh_copy = os.path.join(env.repo_dir, "tenants_fresh") log.info(f"Creating a copy of pageserver in a fresh state at {pageserver_fresh_copy}") shutil.copytree(pageserver_tenants_dir, pageserver_fresh_copy) env.pageserver.start() - pg = env.postgres.create_start('test_s3_wal_replay') + pg = env.postgres.create_start("test_s3_wal_replay") # learn neon timeline from compute tenant_id = pg.safe_psql("show neon.tenant_id")[0][0] @@ -462,7 +503,7 @@ def test_s3_wal_replay(neon_env_builder: NeonEnvBuilder, remote_storatge_kind: R cur.execute("insert into t values (1, 'payload')") expected_sum += 1 - offloaded_seg_end = ['0/3000000'] + offloaded_seg_end = ["0/3000000"] for seg_end in offloaded_seg_end: # roughly fills two segments cur.execute("insert into t select generate_series(1,500000), 'payload'") @@ -476,28 +517,30 @@ def test_s3_wal_replay(neon_env_builder: NeonEnvBuilder, remote_storatge_kind: R # advance remote_consistent_lsn to trigger WAL trimming # this LSN should be less than commit_lsn, so timeline will be active=true in safekeepers, to push etcd updates env.safekeepers[0].http_client().record_safekeeper_info( - tenant_id, timeline_id, {'remote_consistent_lsn': offloaded_seg_end[-1]}) + tenant_id, timeline_id, {"remote_consistent_lsn": offloaded_seg_end[-1]} + ) for sk in env.safekeepers: # require WAL to be trimmed, so no more than one segment is left on disk wait_wal_trim(tenant_id, timeline_id, sk, 16 * 1.5) - last_lsn = query_scalar(cur, 'SELECT pg_current_wal_flush_lsn()') + last_lsn = query_scalar(cur, "SELECT pg_current_wal_flush_lsn()") pageserver_lsn = env.pageserver.http_client().timeline_detail( - uuid.UUID(tenant_id), uuid.UUID((timeline_id)))["local"]["last_record_lsn"] + uuid.UUID(tenant_id), uuid.UUID((timeline_id)) + )["local"]["last_record_lsn"] lag = lsn_from_hex(last_lsn) - lsn_from_hex(pageserver_lsn) log.info( - f'Pageserver last_record_lsn={pageserver_lsn}; flush_lsn={last_lsn}; lag before replay is {lag / 1024}kb' + f"Pageserver last_record_lsn={pageserver_lsn}; flush_lsn={last_lsn}; lag before replay is {lag / 1024}kb" ) # replace pageserver with a fresh copy pg.stop_and_destroy() env.pageserver.stop() - log.info(f'Removing current pageserver state at {pageserver_tenants_dir}') + log.info(f"Removing current pageserver state at {pageserver_tenants_dir}") shutil.rmtree(pageserver_tenants_dir) - log.info(f'Copying fresh pageserver state from {pageserver_fresh_copy}') + log.info(f"Copying fresh pageserver state from {pageserver_fresh_copy}") shutil.move(pageserver_fresh_copy, pageserver_tenants_dir) # start pageserver and wait for replay @@ -509,39 +552,43 @@ def test_s3_wal_replay(neon_env_builder: NeonEnvBuilder, remote_storatge_kind: R while True: elapsed = time.time() - started_at if elapsed > wait_lsn_timeout: - raise RuntimeError(f'Timed out waiting for WAL redo') + raise RuntimeError(f"Timed out waiting for WAL redo") pageserver_lsn = env.pageserver.http_client().timeline_detail( - uuid.UUID(tenant_id), uuid.UUID((timeline_id)))["local"]["last_record_lsn"] + uuid.UUID(tenant_id), uuid.UUID((timeline_id)) + )["local"]["last_record_lsn"] lag = lsn_from_hex(last_lsn) - lsn_from_hex(pageserver_lsn) if time.time() > last_debug_print + 10 or lag <= 0: last_debug_print = time.time() - log.info(f'Pageserver last_record_lsn={pageserver_lsn}; lag is {lag / 1024}kb') + log.info(f"Pageserver last_record_lsn={pageserver_lsn}; lag is {lag / 1024}kb") if lag <= 0: break time.sleep(1) - log.info(f'WAL redo took {elapsed} s') + log.info(f"WAL redo took {elapsed} s") # verify data - pg.create_start('test_s3_wal_replay') + pg.create_start("test_s3_wal_replay") assert pg.safe_psql("select sum(key) from t")[0][0] == expected_sum class ProposerPostgres(PgProtocol): """Object for running postgres without NeonEnv""" - def __init__(self, - pgdata_dir: str, - pg_bin, - timeline_id: uuid.UUID, - tenant_id: uuid.UUID, - listen_addr: str, - port: int): - super().__init__(host=listen_addr, port=port, user='cloud_admin', dbname='postgres') + + def __init__( + self, + pgdata_dir: str, + pg_bin, + timeline_id: uuid.UUID, + tenant_id: uuid.UUID, + listen_addr: str, + port: int, + ): + super().__init__(host=listen_addr, port=port, user="cloud_admin", dbname="postgres") self.pgdata_dir: str = pgdata_dir self.pg_bin: PgBin = pg_bin @@ -551,15 +598,15 @@ class ProposerPostgres(PgProtocol): self.port: int = port def pg_data_dir_path(self) -> str: - """ Path to data directory """ + """Path to data directory""" return self.pgdata_dir def config_file_path(self) -> str: - """ Path to postgresql.conf """ - return os.path.join(self.pgdata_dir, 'postgresql.conf') + """Path to postgresql.conf""" + return os.path.join(self.pgdata_dir, "postgresql.conf") def create_dir_config(self, safekeepers: str): - """ Create dir and config for running --sync-safekeepers """ + """Create dir and config for running --sync-safekeepers""" pathlib.Path(self.pg_data_dir_path()).mkdir(exist_ok=True) with open(self.config_file_path(), "w") as f: @@ -588,36 +635,36 @@ class ProposerPostgres(PgProtocol): } basepath = self.pg_bin.run_capture(command, env) - stdout_filename = basepath + '.stdout' + stdout_filename = basepath + ".stdout" - with open(stdout_filename, 'r') as stdout_f: + with open(stdout_filename, "r") as stdout_f: stdout = stdout_f.read() return stdout.strip("\n ") def initdb(self): - """ Run initdb """ + """Run initdb""" args = ["initdb", "-U", "cloud_admin", "-D", self.pg_data_dir_path()] self.pg_bin.run(args) def start(self): - """ Start postgres with pg_ctl """ + """Start postgres with pg_ctl""" log_path = os.path.join(self.pg_data_dir_path(), "pg.log") args = ["pg_ctl", "-D", self.pg_data_dir_path(), "-l", log_path, "-w", "start"] self.pg_bin.run(args) def stop(self): - """ Stop postgres with pg_ctl """ + """Stop postgres with pg_ctl""" args = ["pg_ctl", "-D", self.pg_data_dir_path(), "-m", "immediate", "-w", "stop"] self.pg_bin.run(args) # insert wal in all safekeepers and run sync on proposer -def test_sync_safekeepers(neon_env_builder: NeonEnvBuilder, - pg_bin: PgBin, - port_distributor: PortDistributor): +def test_sync_safekeepers( + neon_env_builder: NeonEnvBuilder, pg_bin: PgBin, port_distributor: PortDistributor +): # We don't really need the full environment for this test, just the # safekeepers would be enough. @@ -629,12 +676,9 @@ def test_sync_safekeepers(neon_env_builder: NeonEnvBuilder, # write config for proposer pgdata_dir = os.path.join(env.repo_dir, "proposer_pgdata") - pg = ProposerPostgres(pgdata_dir, - pg_bin, - timeline_id, - tenant_id, - '127.0.0.1', - port_distributor.get_port()) + pg = ProposerPostgres( + pgdata_dir, pg_bin, timeline_id, tenant_id, "127.0.0.1", port_distributor.get_port() + ) pg.create_dir_config(env.get_safekeeper_connstrs()) # valid lsn, which is not in the segment start, nor in zero segment @@ -669,13 +713,13 @@ def test_sync_safekeepers(neon_env_builder: NeonEnvBuilder, assert all(lsn_after_sync == lsn for lsn in lsn_after_append) -@pytest.mark.parametrize('auth_enabled', [False, True]) +@pytest.mark.parametrize("auth_enabled", [False, True]) def test_timeline_status(neon_env_builder: NeonEnvBuilder, auth_enabled: bool): neon_env_builder.auth_enabled = auth_enabled env = neon_env_builder.init_start() - env.neon_cli.create_branch('test_timeline_status') - pg = env.postgres.create_start('test_timeline_status') + env.neon_cli.create_branch("test_timeline_status") + pg = env.postgres.create_start("test_timeline_status") wa = env.safekeepers[0] @@ -690,7 +734,8 @@ def test_timeline_status(neon_env_builder: NeonEnvBuilder, auth_enabled: bool): wa_http_cli = wa.http_client(auth_token=env.auth_keys.generate_tenant_token(tenant_id)) wa_http_cli.check_status() wa_http_cli_bad = wa.http_client( - auth_token=env.auth_keys.generate_tenant_token(uuid4().hex)) + auth_token=env.auth_keys.generate_tenant_token(uuid4().hex) + ) wa_http_cli_bad.check_status() wa_http_cli_noauth = wa.http_client() wa_http_cli_noauth.check_status() @@ -702,7 +747,7 @@ def test_timeline_status(neon_env_builder: NeonEnvBuilder, auth_enabled: bool): if auth_enabled: for cli in [wa_http_cli_bad, wa_http_cli_noauth]: - with pytest.raises(cli.HTTPError, match='Forbidden|Unauthorized'): + with pytest.raises(cli.HTTPError, match="Forbidden|Unauthorized"): cli.timeline_status(tenant_id, timeline_id) pg.safe_psql("create table t(i int)") @@ -720,19 +765,23 @@ def test_timeline_status(neon_env_builder: NeonEnvBuilder, auth_enabled: bool): class SafekeeperEnv: - def __init__(self, - repo_dir: Path, - port_distributor: PortDistributor, - pg_bin: PgBin, - num_safekeepers: int = 1): + def __init__( + self, + repo_dir: Path, + port_distributor: PortDistributor, + pg_bin: PgBin, + num_safekeepers: int = 1, + ): self.repo_dir = repo_dir self.port_distributor = port_distributor - self.broker = Etcd(datadir=os.path.join(self.repo_dir, "etcd"), - port=self.port_distributor.get_port(), - peer_port=self.port_distributor.get_port()) + self.broker = Etcd( + datadir=os.path.join(self.repo_dir, "etcd"), + port=self.port_distributor.get_port(), + peer_port=self.port_distributor.get_port(), + ) self.pg_bin = pg_bin self.num_safekeepers = num_safekeepers - self.bin_safekeeper = os.path.join(str(neon_binpath), 'safekeeper') + self.bin_safekeeper = os.path.join(str(neon_binpath), "safekeeper") self.safekeepers: Optional[List[subprocess.CompletedProcess[Any]]] = None self.postgres: Optional[ProposerPostgres] = None self.tenant_id: Optional[uuid.UUID] = None @@ -778,23 +827,25 @@ class SafekeeperEnv: str(i), "--broker-endpoints", self.broker.client_url(), - "--daemonize" + "--daemonize", ] log.info(f'Running command "{" ".join(args)}"') return subprocess.run(args, check=True) def get_safekeeper_connstrs(self): - return ','.join([sk_proc.args[2] for sk_proc in self.safekeepers]) + return ",".join([sk_proc.args[2] for sk_proc in self.safekeepers]) def create_postgres(self): pgdata_dir = os.path.join(self.repo_dir, "proposer_pgdata") - pg = ProposerPostgres(pgdata_dir, - self.pg_bin, - self.timeline_id, - self.tenant_id, - "127.0.0.1", - self.port_distributor.get_port()) + pg = ProposerPostgres( + pgdata_dir, + self.pg_bin, + self.timeline_id, + self.tenant_id, + "127.0.0.1", + self.port_distributor.get_port(), + ) pg.initdb() pg.create_dir_config(self.get_safekeeper_connstrs()) return pg @@ -811,7 +862,7 @@ class SafekeeperEnv: return self def __exit__(self, exc_type, exc_value, traceback): - log.info('Cleaning up all safekeeper and compute nodes') + log.info("Cleaning up all safekeeper and compute nodes") # Stop all the nodes if self.postgres is not None: @@ -821,9 +872,9 @@ class SafekeeperEnv: self.kill_safekeeper(sk_proc.args[6]) -def test_safekeeper_without_pageserver(test_output_dir: str, - port_distributor: PortDistributor, - pg_bin: PgBin): +def test_safekeeper_without_pageserver( + test_output_dir: str, port_distributor: PortDistributor, pg_bin: PgBin +): # Create the environment in the test-specific output dir repo_dir = Path(os.path.join(test_output_dir, "repo")) @@ -845,19 +896,19 @@ def test_safekeeper_without_pageserver(test_output_dir: str, def test_replace_safekeeper(neon_env_builder: NeonEnvBuilder): def safekeepers_guc(env: NeonEnv, sk_names: List[int]) -> str: - return ','.join([f'localhost:{sk.port.pg}' for sk in env.safekeepers if sk.id in sk_names]) + return ",".join([f"localhost:{sk.port.pg}" for sk in env.safekeepers if sk.id in sk_names]) def execute_payload(pg: Postgres): with closing(pg.connect()) as conn: with conn.cursor() as cur: # we rely upon autocommit after each statement # as waiting for acceptors happens there - cur.execute('CREATE TABLE IF NOT EXISTS t(key int, value text)') + cur.execute("CREATE TABLE IF NOT EXISTS t(key int, value text)") cur.execute("INSERT INTO t VALUES (0, 'something')") - sum_before = query_scalar(cur, 'SELECT SUM(key) FROM t') + sum_before = query_scalar(cur, "SELECT SUM(key) FROM t") cur.execute("INSERT INTO t SELECT generate_series(1,100000), 'payload'") - sum_after = query_scalar(cur, 'SELECT SUM(key) FROM t') + sum_after = query_scalar(cur, "SELECT SUM(key) FROM t") assert sum_after == sum_before + 5000050000 def show_statuses(safekeepers: List[Safekeeper], tenant_id: str, timeline_id: str): @@ -871,12 +922,12 @@ def test_replace_safekeeper(neon_env_builder: NeonEnvBuilder): neon_env_builder.num_safekeepers = 4 env = neon_env_builder.init_start() - env.neon_cli.create_branch('test_replace_safekeeper') + env.neon_cli.create_branch("test_replace_safekeeper") log.info("Use only first 3 safekeepers") env.safekeepers[3].stop() active_safekeepers = [1, 2, 3] - pg = env.postgres.create('test_replace_safekeeper') + pg = env.postgres.create("test_replace_safekeeper") pg.adjust_for_safekeepers(safekeepers_guc(env, active_safekeepers)) pg.start() @@ -914,7 +965,7 @@ def test_replace_safekeeper(neon_env_builder: NeonEnvBuilder): show_statuses(env.safekeepers, tenant_id, timeline_id) log.info("Recreate postgres to replace failed sk1 with new sk4") - pg.stop_and_destroy().create('test_replace_safekeeper') + pg.stop_and_destroy().create("test_replace_safekeeper") active_safekeepers = [2, 3, 4] env.safekeepers[3].start() pg.adjust_for_safekeepers(safekeepers_guc(env, active_safekeepers)) @@ -934,16 +985,16 @@ def test_replace_safekeeper(neon_env_builder: NeonEnvBuilder): # of WAL segments. def test_wal_deleted_after_broadcast(neon_env_builder: NeonEnvBuilder): # used to calculate delta in collect_stats - last_lsn = .0 + last_lsn = 0.0 # returns LSN and pg_wal size, all in MB def collect_stats(pg: Postgres, cur, enable_logs=True): nonlocal last_lsn assert pg.pgdata_dir is not None - log.info('executing INSERT to generate WAL') + log.info("executing INSERT to generate WAL") current_lsn = lsn_from_hex(query_scalar(cur, "select pg_current_wal_lsn()")) / 1024 / 1024 - pg_wal_size = get_dir_size(os.path.join(pg.pgdata_dir, 'pg_wal')) / 1024 / 1024 + pg_wal_size = get_dir_size(os.path.join(pg.pgdata_dir, "pg_wal")) / 1024 / 1024 if enable_logs: log.info(f"LSN delta: {current_lsn - last_lsn} MB, current WAL size: {pg_wal_size} MB") last_lsn = current_lsn @@ -956,15 +1007,16 @@ def test_wal_deleted_after_broadcast(neon_env_builder: NeonEnvBuilder): neon_env_builder.num_safekeepers = 3 env = neon_env_builder.init_start() - env.neon_cli.create_branch('test_wal_deleted_after_broadcast') + env.neon_cli.create_branch("test_wal_deleted_after_broadcast") # Adjust checkpoint config to prevent keeping old WAL segments pg = env.postgres.create_start( - 'test_wal_deleted_after_broadcast', - config_lines=['min_wal_size=32MB', 'max_wal_size=32MB', 'log_checkpoints=on']) + "test_wal_deleted_after_broadcast", + config_lines=["min_wal_size=32MB", "max_wal_size=32MB", "log_checkpoints=on"], + ) pg_conn = pg.connect() cur = pg_conn.cursor() - cur.execute('CREATE TABLE t(key int, value text)') + cur.execute("CREATE TABLE t(key int, value text)") collect_stats(pg, cur) @@ -973,15 +1025,15 @@ def test_wal_deleted_after_broadcast(neon_env_builder: NeonEnvBuilder): generate_wal(cur) collect_stats(pg, cur) - log.info('executing checkpoint') - cur.execute('CHECKPOINT') + log.info("executing checkpoint") + cur.execute("CHECKPOINT") wal_size_after_checkpoint = collect_stats(pg, cur)[1] # there shouldn't be more than 2 WAL segments (but dir may have archive_status files) assert wal_size_after_checkpoint < 16 * 2.5 -@pytest.mark.parametrize('auth_enabled', [False, True]) +@pytest.mark.parametrize("auth_enabled", [False, True]) def test_delete_force(neon_env_builder: NeonEnvBuilder, auth_enabled: bool): neon_env_builder.num_safekeepers = 1 neon_env_builder.auth_enabled = auth_enabled @@ -989,25 +1041,25 @@ def test_delete_force(neon_env_builder: NeonEnvBuilder, auth_enabled: bool): # Create two tenants: one will be deleted, other should be preserved. tenant_id = env.initial_tenant.hex - timeline_id_1 = env.neon_cli.create_branch('br1').hex # Active, delete explicitly - timeline_id_2 = env.neon_cli.create_branch('br2').hex # Inactive, delete explicitly - timeline_id_3 = env.neon_cli.create_branch('br3').hex # Active, delete with the tenant - timeline_id_4 = env.neon_cli.create_branch('br4').hex # Inactive, delete with the tenant + timeline_id_1 = env.neon_cli.create_branch("br1").hex # Active, delete explicitly + timeline_id_2 = env.neon_cli.create_branch("br2").hex # Inactive, delete explicitly + timeline_id_3 = env.neon_cli.create_branch("br3").hex # Active, delete with the tenant + timeline_id_4 = env.neon_cli.create_branch("br4").hex # Inactive, delete with the tenant tenant_id_other_uuid, timeline_id_other_uuid = env.neon_cli.create_tenant() tenant_id_other = tenant_id_other_uuid.hex timeline_id_other = timeline_id_other_uuid.hex # Populate branches - pg_1 = env.postgres.create_start('br1') - pg_2 = env.postgres.create_start('br2') - pg_3 = env.postgres.create_start('br3') - pg_4 = env.postgres.create_start('br4') - pg_other = env.postgres.create_start('main', tenant_id=uuid.UUID(hex=tenant_id_other)) + pg_1 = env.postgres.create_start("br1") + pg_2 = env.postgres.create_start("br2") + pg_3 = env.postgres.create_start("br3") + pg_4 = env.postgres.create_start("br4") + pg_other = env.postgres.create_start("main", tenant_id=uuid.UUID(hex=tenant_id_other)) for pg in [pg_1, pg_2, pg_3, pg_4, pg_other]: with closing(pg.connect()) as conn: with conn.cursor() as cur: - cur.execute('CREATE TABLE t(key int primary key)') + cur.execute("CREATE TABLE t(key int primary key)") sk = env.safekeepers[0] sk_data_dir = Path(sk.data_dir()) if not auth_enabled: @@ -1016,7 +1068,8 @@ def test_delete_force(neon_env_builder: NeonEnvBuilder, auth_enabled: bool): else: sk_http = sk.http_client(auth_token=env.auth_keys.generate_tenant_token(tenant_id)) sk_http_other = sk.http_client( - auth_token=env.auth_keys.generate_tenant_token(tenant_id_other)) + auth_token=env.auth_keys.generate_tenant_token(tenant_id_other) + ) sk_http_noauth = sk.http_client() assert (sk_data_dir / tenant_id / timeline_id_1).is_dir() assert (sk_data_dir / tenant_id / timeline_id_2).is_dir() @@ -1034,7 +1087,7 @@ def test_delete_force(neon_env_builder: NeonEnvBuilder, auth_enabled: bool): for pg in [pg_1, pg_3, pg_other]: with closing(pg.connect()) as conn: with conn.cursor() as cur: - cur.execute('INSERT INTO t (key) VALUES (1)') + cur.execute("INSERT INTO t (key) VALUES (1)") # Remove initial tenant's br1 (active) assert sk_http.timeline_delete_force(tenant_id, timeline_id_1) == { @@ -1049,7 +1102,8 @@ def test_delete_force(neon_env_builder: NeonEnvBuilder, auth_enabled: bool): # Ensure repeated deletion succeeds assert sk_http.timeline_delete_force(tenant_id, timeline_id_1) == { - "dir_existed": False, "was_active": False + "dir_existed": False, + "was_active": False, } assert not (sk_data_dir / tenant_id / timeline_id_1).exists() assert (sk_data_dir / tenant_id / timeline_id_2).is_dir() @@ -1060,9 +1114,9 @@ def test_delete_force(neon_env_builder: NeonEnvBuilder, auth_enabled: bool): if auth_enabled: # Ensure we cannot delete the other tenant for sk_h in [sk_http, sk_http_noauth]: - with pytest.raises(sk_h.HTTPError, match='Forbidden|Unauthorized'): + with pytest.raises(sk_h.HTTPError, match="Forbidden|Unauthorized"): assert sk_h.timeline_delete_force(tenant_id_other, timeline_id_other) - with pytest.raises(sk_h.HTTPError, match='Forbidden|Unauthorized'): + with pytest.raises(sk_h.HTTPError, match="Forbidden|Unauthorized"): assert sk_h.tenant_delete_force(tenant_id_other) assert (sk_data_dir / tenant_id_other / timeline_id_other).is_dir() @@ -1078,7 +1132,7 @@ def test_delete_force(neon_env_builder: NeonEnvBuilder, auth_enabled: bool): assert (sk_data_dir / tenant_id_other / timeline_id_other).is_dir() # Remove non-existing branch, should succeed - assert sk_http.timeline_delete_force(tenant_id, '00' * 16) == { + assert sk_http.timeline_delete_force(tenant_id, "00" * 16) == { "dir_existed": False, "was_active": False, } @@ -1107,4 +1161,4 @@ def test_delete_force(neon_env_builder: NeonEnvBuilder, auth_enabled: bool): sk_http_other.timeline_status(tenant_id_other, timeline_id_other) with closing(pg_other.connect()) as conn: with conn.cursor() as cur: - cur.execute('INSERT INTO t (key) VALUES (123)') + cur.execute("INSERT INTO t (key) VALUES (123)") diff --git a/test_runner/batch_others/test_wal_acceptor_async.py b/test_runner/batch_others/test_wal_acceptor_async.py index e1d3ba0919..83285e0cbe 100644 --- a/test_runner/batch_others/test_wal_acceptor_async.py +++ b/test_runner/batch_others/test_wal_acceptor_async.py @@ -1,17 +1,16 @@ import asyncio -import uuid - -import asyncpg import random import time - -from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder, Postgres, Safekeeper -from fixtures.log_helper import getLogger -from fixtures.utils import lsn_from_hex, lsn_to_hex -from typing import List, Optional +import uuid from dataclasses import dataclass +from typing import List, Optional -log = getLogger('root.safekeeper_async') +import asyncpg +from fixtures.log_helper import getLogger +from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder, Postgres, Safekeeper +from fixtures.utils import lsn_from_hex, lsn_to_hex + +log = getLogger("root.safekeeper_async") class BankClient(object): @@ -21,21 +20,22 @@ class BankClient(object): self.init_amount = init_amount async def initdb(self): - await self.conn.execute('DROP TABLE IF EXISTS bank_accs') - await self.conn.execute('CREATE TABLE bank_accs(uid int primary key, amount int)') + await self.conn.execute("DROP TABLE IF EXISTS bank_accs") + await self.conn.execute("CREATE TABLE bank_accs(uid int primary key, amount int)") await self.conn.execute( - ''' + """ INSERT INTO bank_accs SELECT *, $1 FROM generate_series(0, $2) - ''', + """, self.init_amount, - self.n_accounts - 1) - await self.conn.execute('DROP TABLE IF EXISTS bank_log') - await self.conn.execute('CREATE TABLE bank_log(from_uid int, to_uid int, amount int)') + self.n_accounts - 1, + ) + await self.conn.execute("DROP TABLE IF EXISTS bank_log") + await self.conn.execute("CREATE TABLE bank_log(from_uid int, to_uid int, amount int)") async def check_invariant(self): - row = await self.conn.fetchrow('SELECT sum(amount) AS sum FROM bank_accs') - assert row['sum'] == self.n_accounts * self.init_amount + row = await self.conn.fetchrow("SELECT sum(amount) AS sum FROM bank_accs") + assert row["sum"] == self.n_accounts * self.init_amount async def bank_transfer(conn: asyncpg.Connection, from_uid, to_uid, amount): @@ -45,17 +45,17 @@ async def bank_transfer(conn: asyncpg.Connection, from_uid, to_uid, amount): async with conn.transaction(): await conn.execute( - 'UPDATE bank_accs SET amount = amount + ($1) WHERE uid = $2', + "UPDATE bank_accs SET amount = amount + ($1) WHERE uid = $2", amount, to_uid, ) await conn.execute( - 'UPDATE bank_accs SET amount = amount - ($1) WHERE uid = $2', + "UPDATE bank_accs SET amount = amount - ($1) WHERE uid = $2", amount, from_uid, ) await conn.execute( - 'INSERT INTO bank_log VALUES ($1, $2, $3)', + "INSERT INTO bank_log VALUES ($1, $2, $3)", from_uid, to_uid, amount, @@ -80,12 +80,12 @@ class WorkerStats(object): assert all(cnt > 0 for cnt in self.counters) progress = sum(self.counters) - log.info('All workers made {} transactions'.format(progress)) + log.info("All workers made {} transactions".format(progress)) async def run_random_worker(stats: WorkerStats, pg: Postgres, worker_id, n_accounts, max_transfer): pg_conn = await pg.connect_async() - log.debug('Started worker {}'.format(worker_id)) + log.debug("Started worker {}".format(worker_id)) while stats.running: from_uid = random.randint(0, n_accounts - 1) @@ -95,19 +95,21 @@ async def run_random_worker(stats: WorkerStats, pg: Postgres, worker_id, n_accou await bank_transfer(pg_conn, from_uid, to_uid, amount) stats.inc_progress(worker_id) - log.debug('Executed transfer({}) {} => {}'.format(amount, from_uid, to_uid)) + log.debug("Executed transfer({}) {} => {}".format(amount, from_uid, to_uid)) - log.debug('Finished worker {}'.format(worker_id)) + log.debug("Finished worker {}".format(worker_id)) await pg_conn.close() -async def wait_for_lsn(safekeeper: Safekeeper, - tenant_id: str, - timeline_id: str, - wait_lsn: str, - polling_interval=1, - timeout=60): +async def wait_for_lsn( + safekeeper: Safekeeper, + tenant_id: str, + timeline_id: str, + wait_lsn: str, + polling_interval=1, + timeout=60, +): """ Poll flush_lsn from safekeeper until it's greater or equal than provided wait_lsn. To do that, timeline_status is fetched from @@ -119,7 +121,7 @@ async def wait_for_lsn(safekeeper: Safekeeper, flush_lsn = client.timeline_status(tenant_id, timeline_id).flush_lsn log.info( - f'Safekeeper at port {safekeeper.port.pg} has flush_lsn {flush_lsn}, waiting for lsn {wait_lsn}' + f"Safekeeper at port {safekeeper.port.pg} has flush_lsn {flush_lsn}, waiting for lsn {wait_lsn}" ) while lsn_from_hex(wait_lsn) > lsn_from_hex(flush_lsn): @@ -131,22 +133,24 @@ async def wait_for_lsn(safekeeper: Safekeeper, await asyncio.sleep(polling_interval) flush_lsn = client.timeline_status(tenant_id, timeline_id).flush_lsn - log.debug(f'safekeeper port={safekeeper.port.pg} flush_lsn={flush_lsn} wait_lsn={wait_lsn}') + log.debug(f"safekeeper port={safekeeper.port.pg} flush_lsn={flush_lsn} wait_lsn={wait_lsn}") # This test will run several iterations and check progress in each of them. # On each iteration 1 acceptor is stopped, and 2 others should allow # background workers execute transactions. In the end, state should remain # consistent. -async def run_restarts_under_load(env: NeonEnv, - pg: Postgres, - acceptors: List[Safekeeper], - n_workers=10, - n_accounts=100, - init_amount=100000, - max_transfer=100, - period_time=4, - iterations=10): +async def run_restarts_under_load( + env: NeonEnv, + pg: Postgres, + acceptors: List[Safekeeper], + n_workers=10, + n_accounts=100, + init_amount=100000, + max_transfer=100, + period_time=4, + iterations=10, +): # Set timeout for this test at 5 minutes. It should be enough for test to complete, # taking into account that this timeout is checked only at the beginning of every iteration. test_timeout_at = time.monotonic() + 5 * 60 @@ -166,20 +170,21 @@ async def run_restarts_under_load(env: NeonEnv, workers.append(asyncio.create_task(worker)) for it in range(iterations): - assert time.monotonic() < test_timeout_at, 'test timed out' + assert time.monotonic() < test_timeout_at, "test timed out" victim_idx = it % len(acceptors) victim = acceptors[victim_idx] victim.stop() - flush_lsn = await pg_conn.fetchval('SELECT pg_current_wal_flush_lsn()') + flush_lsn = await pg_conn.fetchval("SELECT pg_current_wal_flush_lsn()") flush_lsn = lsn_to_hex(flush_lsn) - log.info(f'Postgres flush_lsn {flush_lsn}') + log.info(f"Postgres flush_lsn {flush_lsn}") pageserver_lsn = env.pageserver.http_client().timeline_detail( - uuid.UUID(tenant_id), uuid.UUID((timeline_id)))["local"]["last_record_lsn"] + uuid.UUID(tenant_id), uuid.UUID((timeline_id)) + )["local"]["last_record_lsn"] sk_ps_lag = lsn_from_hex(flush_lsn) - lsn_from_hex(pageserver_lsn) - log.info(f'Pageserver last_record_lsn={pageserver_lsn} lag={sk_ps_lag / 1024}kb') + log.info(f"Pageserver last_record_lsn={pageserver_lsn} lag={sk_ps_lag / 1024}kb") # Wait until alive safekeepers catch up with postgres for idx, safekeeper in enumerate(acceptors): @@ -193,7 +198,7 @@ async def run_restarts_under_load(env: NeonEnv, victim.start() - log.info('Iterations are finished, exiting coroutines...') + log.info("Iterations are finished, exiting coroutines...") stats.running = False # await all workers await asyncio.gather(*workers) @@ -207,10 +212,11 @@ def test_restarts_under_load(neon_env_builder: NeonEnvBuilder): neon_env_builder.num_safekeepers = 3 env = neon_env_builder.init_start() - env.neon_cli.create_branch('test_safekeepers_restarts_under_load') + env.neon_cli.create_branch("test_safekeepers_restarts_under_load") # Enable backpressure with 1MB maximal lag, because we don't want to block on `wait_for_lsn()` for too long - pg = env.postgres.create_start('test_safekeepers_restarts_under_load', - config_lines=['max_replication_write_lag=1MB']) + pg = env.postgres.create_start( + "test_safekeepers_restarts_under_load", config_lines=["max_replication_write_lag=1MB"] + ) asyncio.run(run_restarts_under_load(env, pg, env.safekeepers)) @@ -222,15 +228,17 @@ def test_restarts_frequent_checkpoints(neon_env_builder: NeonEnvBuilder): neon_env_builder.num_safekeepers = 3 env = neon_env_builder.init_start() - env.neon_cli.create_branch('test_restarts_frequent_checkpoints') + env.neon_cli.create_branch("test_restarts_frequent_checkpoints") # Enable backpressure with 1MB maximal lag, because we don't want to block on `wait_for_lsn()` for too long - pg = env.postgres.create_start('test_restarts_frequent_checkpoints', - config_lines=[ - 'max_replication_write_lag=1MB', - 'min_wal_size=32MB', - 'max_wal_size=32MB', - 'log_checkpoints=on' - ]) + pg = env.postgres.create_start( + "test_restarts_frequent_checkpoints", + config_lines=[ + "max_replication_write_lag=1MB", + "min_wal_size=32MB", + "max_wal_size=32MB", + "log_checkpoints=on", + ], + ) # we try to simulate large (flush_lsn - truncate_lsn) lag, to test that WAL segments # are not removed before broadcasted to all safekeepers, with the help of replication slot @@ -244,51 +252,51 @@ def postgres_create_start(env: NeonEnv, branch: str, pgdir_name: Optional[str]): port=env.port_distributor.get_port(), # In these tests compute has high probability of terminating on its own # before our stop() due to lost consensus leadership. - check_stop_result=False) + check_stop_result=False, + ) # embed current time in node name - node_name = pgdir_name or f'pg_node_{time.time()}' - return pg.create_start(branch_name=branch, - node_name=node_name, - config_lines=['log_statement=all']) + node_name = pgdir_name or f"pg_node_{time.time()}" + return pg.create_start( + branch_name=branch, node_name=node_name, config_lines=["log_statement=all"] + ) -async def exec_compute_query(env: NeonEnv, - branch: str, - query: str, - pgdir_name: Optional[str] = None): +async def exec_compute_query( + env: NeonEnv, branch: str, query: str, pgdir_name: Optional[str] = None +): with postgres_create_start(env, branch=branch, pgdir_name=pgdir_name) as pg: before_conn = time.time() conn = await pg.connect_async() res = await conn.fetch(query) await conn.close() after_conn = time.time() - log.info(f'{query} took {after_conn - before_conn}s') + log.info(f"{query} took {after_conn - before_conn}s") return res -async def run_compute_restarts(env: NeonEnv, - queries=16, - batch_insert=10000, - branch='test_compute_restarts'): +async def run_compute_restarts( + env: NeonEnv, queries=16, batch_insert=10000, branch="test_compute_restarts" +): cnt = 0 sum = 0 - await exec_compute_query(env, branch, 'CREATE TABLE t (i int)') + await exec_compute_query(env, branch, "CREATE TABLE t (i int)") for i in range(queries): if i % 4 == 0: await exec_compute_query( - env, branch, f'INSERT INTO t SELECT 1 FROM generate_series(1, {batch_insert})') + env, branch, f"INSERT INTO t SELECT 1 FROM generate_series(1, {batch_insert})" + ) sum += batch_insert cnt += batch_insert elif (i % 4 == 1) or (i % 4 == 3): # Note that select causes lots of FPI's and increases probability of safekeepers # standing at different LSNs after compute termination. - actual_sum = (await exec_compute_query(env, branch, 'SELECT SUM(i) FROM t'))[0][0] - assert actual_sum == sum, f'Expected sum={sum}, actual={actual_sum}' + actual_sum = (await exec_compute_query(env, branch, "SELECT SUM(i) FROM t"))[0][0] + assert actual_sum == sum, f"Expected sum={sum}, actual={actual_sum}" elif i % 4 == 2: - await exec_compute_query(env, branch, 'UPDATE t SET i = i + 1') + await exec_compute_query(env, branch, "UPDATE t SET i = i + 1") sum += cnt @@ -297,7 +305,7 @@ def test_compute_restarts(neon_env_builder: NeonEnvBuilder): neon_env_builder.num_safekeepers = 3 env = neon_env_builder.init_start() - env.neon_cli.create_branch('test_compute_restarts') + env.neon_cli.create_branch("test_compute_restarts") asyncio.run(run_compute_restarts(env)) @@ -315,7 +323,7 @@ class BackgroundCompute(object): async def run(self): if self.running: - raise Exception('BackgroundCompute is already running') + raise Exception("BackgroundCompute is already running") self.running = True i = 0 @@ -327,17 +335,17 @@ class BackgroundCompute(object): res = await exec_compute_query( self.env, self.branch, - f'INSERT INTO query_log(index, verify_key) VALUES ({self.index}, {verify_key}) RETURNING verify_key', - pgdir_name=f'bgcompute{self.index}_key{verify_key}', + f"INSERT INTO query_log(index, verify_key) VALUES ({self.index}, {verify_key}) RETURNING verify_key", + pgdir_name=f"bgcompute{self.index}_key{verify_key}", ) - log.info(f'result: {res}') + log.info(f"result: {res}") if len(res) != 1: - raise Exception('No result returned') + raise Exception("No result returned") if res[0][0] != verify_key: - raise Exception('Wrong result returned') + raise Exception("Wrong result returned") self.successful_queries.append(verify_key) except Exception as e: - log.info(f'BackgroundCompute {self.index} query failed: {e}') + log.info(f"BackgroundCompute {self.index} query failed: {e}") # With less sleep, there is a very big chance of not committing # anything or only 1 xact during test run. @@ -345,14 +353,12 @@ class BackgroundCompute(object): self.running = False -async def run_concurrent_computes(env: NeonEnv, - num_computes=10, - run_seconds=20, - branch='test_concurrent_computes'): +async def run_concurrent_computes( + env: NeonEnv, num_computes=10, run_seconds=20, branch="test_concurrent_computes" +): await exec_compute_query( - env, - branch, - 'CREATE TABLE query_log (t timestamp default now(), index int, verify_key int)') + env, branch, "CREATE TABLE query_log (t timestamp default now(), index int, verify_key int)" + ) computes = [BackgroundCompute(i, env, branch) for i in range(num_computes)] background_tasks = [asyncio.create_task(compute.run()) for compute in computes] @@ -367,13 +373,17 @@ async def run_concurrent_computes(env: NeonEnv, # work for some time with only one compute -- it should be able to make some xacts TIMEOUT_SECONDS = computes[0].MAX_QUERY_GAP_SECONDS + 3 initial_queries_by_0 = len(computes[0].successful_queries) - log.info(f'Waiting for another query by computes[0], ' - f'it already had {initial_queries_by_0}, timeout is {TIMEOUT_SECONDS}s') + log.info( + f"Waiting for another query by computes[0], " + f"it already had {initial_queries_by_0}, timeout is {TIMEOUT_SECONDS}s" + ) for _ in range(10 * TIMEOUT_SECONDS): current_queries_by_0 = len(computes[0].successful_queries) - initial_queries_by_0 if current_queries_by_0 >= 1: - log.info(f'Found {current_queries_by_0} successful queries ' - f'by computes[0], completing the test') + log.info( + f"Found {current_queries_by_0} successful queries " + f"by computes[0], completing the test" + ) break await asyncio.sleep(0.1) else: @@ -382,12 +392,14 @@ async def run_concurrent_computes(env: NeonEnv, await asyncio.gather(background_tasks[0]) - result = await exec_compute_query(env, branch, 'SELECT * FROM query_log') + result = await exec_compute_query(env, branch, "SELECT * FROM query_log") # we should have inserted something while single compute was running - log.info(f'Executed {len(result)} queries, {current_queries_by_0} of them ' - f'by computes[0] after we started stopping the others') + log.info( + f"Executed {len(result)} queries, {current_queries_by_0} of them " + f"by computes[0] after we started stopping the others" + ) for row in result: - log.info(f'{row[0]} {row[1]} {row[2]}') + log.info(f"{row[0]} {row[1]} {row[2]}") # ensure everything reported as committed wasn't lost for compute in computes: @@ -402,16 +414,15 @@ def test_concurrent_computes(neon_env_builder: NeonEnvBuilder): neon_env_builder.num_safekeepers = 3 env = neon_env_builder.init_start() - env.neon_cli.create_branch('test_concurrent_computes') + env.neon_cli.create_branch("test_concurrent_computes") asyncio.run(run_concurrent_computes(env)) # Stop safekeeper and check that query cannot be executed while safekeeper is down. # Query will insert a single row into a table. -async def check_unavailability(sk: Safekeeper, - conn: asyncpg.Connection, - key: int, - start_delay_sec: int = 2): +async def check_unavailability( + sk: Safekeeper, conn: asyncpg.Connection, key: int, start_delay_sec: int = 2 +): # shutdown one of two acceptors, that is, majority sk.stop() @@ -431,7 +442,7 @@ async def run_unavailability(env: NeonEnv, pg: Postgres): conn = await pg.connect_async() # check basic work with table - await conn.execute('CREATE TABLE t(key int primary key, value text)') + await conn.execute("CREATE TABLE t(key int primary key, value text)") await conn.execute("INSERT INTO t values (1, 'payload')") # stop safekeeper and check that query cannot be executed while safekeeper is down @@ -443,7 +454,7 @@ async def run_unavailability(env: NeonEnv, pg: Postgres): # check that we can execute queries after restart await conn.execute("INSERT INTO t values (4, 'payload')") - result_sum = await conn.fetchval('SELECT sum(key) FROM t') + result_sum = await conn.fetchval("SELECT sum(key) FROM t") assert result_sum == 10 @@ -452,8 +463,8 @@ def test_unavailability(neon_env_builder: NeonEnvBuilder): neon_env_builder.num_safekeepers = 2 env = neon_env_builder.init_start() - env.neon_cli.create_branch('test_safekeepers_unavailability') - pg = env.postgres.create_start('test_safekeepers_unavailability') + env.neon_cli.create_branch("test_safekeepers_unavailability") + pg = env.postgres.create_start("test_safekeepers_unavailability") asyncio.run(run_unavailability(env, pg)) @@ -473,20 +484,20 @@ async def xmas_garland(safekeepers: List[Safekeeper], data: RaceConditionTest): if random.random() >= 0.5: victims.append(sk) log.info( - f'Iteration {data.iteration}: stopping {list(map(lambda sk: sk.id, victims))} safekeepers' + f"Iteration {data.iteration}: stopping {list(map(lambda sk: sk.id, victims))} safekeepers" ) for v in victims: v.stop() await asyncio.sleep(1) for v in victims: v.start() - log.info(f'Iteration {data.iteration} finished') + log.info(f"Iteration {data.iteration} finished") await asyncio.sleep(1) async def run_race_conditions(env: NeonEnv, pg: Postgres): conn = await pg.connect_async() - await conn.execute('CREATE TABLE t(key int primary key, value text)') + await conn.execute("CREATE TABLE t(key int primary key, value text)") data = RaceConditionTest(0, False) bg_xmas = asyncio.create_task(xmas_garland(env.safekeepers, data)) @@ -501,9 +512,9 @@ async def run_race_conditions(env: NeonEnv, pg: Postgres): expected_sum += i i += 1 - log.info(f'Executed {i-1} queries') + log.info(f"Executed {i-1} queries") - res = await conn.fetchval('SELECT sum(key) FROM t') + res = await conn.fetchval("SELECT sum(key) FROM t") assert res == expected_sum data.is_stopped = True @@ -516,8 +527,8 @@ def test_race_conditions(neon_env_builder: NeonEnvBuilder): neon_env_builder.num_safekeepers = 3 env = neon_env_builder.init_start() - env.neon_cli.create_branch('test_safekeepers_race_conditions') - pg = env.postgres.create_start('test_safekeepers_race_conditions') + env.neon_cli.create_branch("test_safekeepers_race_conditions") + pg = env.postgres.create_start("test_safekeepers_race_conditions") asyncio.run(run_race_conditions(env, pg)) @@ -527,13 +538,15 @@ def test_race_conditions(neon_env_builder: NeonEnvBuilder): async def run_wal_lagging(env: NeonEnv, pg: Postgres): def safekeepers_guc(env: NeonEnv, active_sk: List[bool]) -> str: # use ports 10, 11 and 12 to simulate unavailable safekeepers - return ','.join([ - f'localhost:{sk.port.pg if active else 10 + i}' - for i, (sk, active) in enumerate(zip(env.safekeepers, active_sk)) - ]) + return ",".join( + [ + f"localhost:{sk.port.pg if active else 10 + i}" + for i, (sk, active) in enumerate(zip(env.safekeepers, active_sk)) + ] + ) conn = await pg.connect_async() - await conn.execute('CREATE TABLE t(key int primary key, value text)') + await conn.execute("CREATE TABLE t(key int primary key, value text)") await conn.close() pg.stop() @@ -552,7 +565,7 @@ async def run_wal_lagging(env: NeonEnv, pg: Postgres): continue pg.adjust_for_safekeepers(safekeepers_guc(env, active_sk)) - log.info(f'Iteration {it}: {active_sk}') + log.info(f"Iteration {it}: {active_sk}") pg.start() conn = await pg.connect_async() @@ -569,9 +582,9 @@ async def run_wal_lagging(env: NeonEnv, pg: Postgres): pg.start() conn = await pg.connect_async() - log.info(f'Executed {i-1} queries') + log.info(f"Executed {i-1} queries") - res = await conn.fetchval('SELECT sum(key) FROM t') + res = await conn.fetchval("SELECT sum(key) FROM t") assert res == expected_sum @@ -581,7 +594,7 @@ def test_wal_lagging(neon_env_builder: NeonEnvBuilder): neon_env_builder.num_safekeepers = 3 env = neon_env_builder.init_start() - env.neon_cli.create_branch('test_wal_lagging') - pg = env.postgres.create_start('test_wal_lagging') + env.neon_cli.create_branch("test_wal_lagging") + pg = env.postgres.create_start("test_wal_lagging") asyncio.run(run_wal_lagging(env, pg)) diff --git a/test_runner/batch_others/test_wal_restore.py b/test_runner/batch_others/test_wal_restore.py index 809e942415..0847b5a505 100644 --- a/test_runner/batch_others/test_wal_restore.py +++ b/test_runner/batch_others/test_wal_restore.py @@ -1,33 +1,39 @@ import os from pathlib import Path -from fixtures.neon_fixtures import (NeonEnvBuilder, - VanillaPostgres, - PortDistributor, - PgBin, - base_dir, - pg_distrib_dir) +from fixtures.neon_fixtures import ( + NeonEnvBuilder, + PgBin, + PortDistributor, + VanillaPostgres, + base_dir, + pg_distrib_dir, +) -def test_wal_restore(neon_env_builder: NeonEnvBuilder, - pg_bin: PgBin, - test_output_dir: Path, - port_distributor: PortDistributor): +def test_wal_restore( + neon_env_builder: NeonEnvBuilder, + pg_bin: PgBin, + test_output_dir: Path, + port_distributor: PortDistributor, +): env = neon_env_builder.init_start() env.neon_cli.create_branch("test_wal_restore") - pg = env.postgres.create_start('test_wal_restore') + pg = env.postgres.create_start("test_wal_restore") pg.safe_psql("create table t as select generate_series(1,300000)") tenant_id = pg.safe_psql("show neon.tenant_id")[0][0] env.neon_cli.pageserver_stop() port = port_distributor.get_port() - data_dir = test_output_dir / 'pgsql.restored' + data_dir = test_output_dir / "pgsql.restored" with VanillaPostgres(data_dir, PgBin(test_output_dir), port) as restored: - pg_bin.run_capture([ - os.path.join(base_dir, 'libs/utils/scripts/restore_from_wal.sh'), - os.path.join(pg_distrib_dir, 'bin'), - str(test_output_dir / 'repo' / 'safekeepers' / 'sk1' / str(tenant_id) / '*'), - str(data_dir), - str(port) - ]) + pg_bin.run_capture( + [ + os.path.join(base_dir, "libs/utils/scripts/restore_from_wal.sh"), + os.path.join(pg_distrib_dir, "bin"), + str(test_output_dir / "repo" / "safekeepers" / "sk1" / str(tenant_id) / "*"), + str(data_dir), + str(port), + ] + ) restored.start() - assert restored.safe_psql('select count(*) from t', user='cloud_admin') == [(300000, )] + assert restored.safe_psql("select count(*) from t", user="cloud_admin") == [(300000,)] diff --git a/test_runner/batch_pg_regress/test_isolation.py b/test_runner/batch_pg_regress/test_isolation.py index 0124459440..7127a069b0 100644 --- a/test_runner/batch_pg_regress/test_isolation.py +++ b/test_runner/batch_pg_regress/test_isolation.py @@ -1,5 +1,6 @@ import os from pathlib import Path + import pytest from fixtures.neon_fixtures import NeonEnv, base_dir, pg_distrib_dir @@ -13,33 +14,33 @@ def test_isolation(neon_simple_env: NeonEnv, test_output_dir: Path, pg_bin, caps env.neon_cli.create_branch("test_isolation", "empty") # Connect to postgres and create a database called "regression". # isolation tests use prepared transactions, so enable them - pg = env.postgres.create_start('test_isolation', config_lines=['max_prepared_transactions=100']) - pg.safe_psql('CREATE DATABASE isolation_regression') + pg = env.postgres.create_start("test_isolation", config_lines=["max_prepared_transactions=100"]) + pg.safe_psql("CREATE DATABASE isolation_regression") # Create some local directories for pg_isolation_regress to run in. - runpath = test_output_dir / 'regress' - (runpath / 'testtablespace').mkdir(parents=True) + runpath = test_output_dir / "regress" + (runpath / "testtablespace").mkdir(parents=True) # Compute all the file locations that pg_isolation_regress will need. - build_path = os.path.join(pg_distrib_dir, 'build/src/test/isolation') - src_path = os.path.join(base_dir, 'vendor/postgres/src/test/isolation') - bindir = os.path.join(pg_distrib_dir, 'bin') - schedule = os.path.join(src_path, 'isolation_schedule') - pg_isolation_regress = os.path.join(build_path, 'pg_isolation_regress') + build_path = os.path.join(pg_distrib_dir, "build/src/test/isolation") + src_path = os.path.join(base_dir, "vendor/postgres/src/test/isolation") + bindir = os.path.join(pg_distrib_dir, "bin") + schedule = os.path.join(src_path, "isolation_schedule") + pg_isolation_regress = os.path.join(build_path, "pg_isolation_regress") pg_isolation_regress_command = [ pg_isolation_regress, - '--use-existing', - '--bindir={}'.format(bindir), - '--dlpath={}'.format(build_path), - '--inputdir={}'.format(src_path), - '--schedule={}'.format(schedule), + "--use-existing", + "--bindir={}".format(bindir), + "--dlpath={}".format(build_path), + "--inputdir={}".format(src_path), + "--schedule={}".format(schedule), ] env_vars = { - 'PGPORT': str(pg.default_options['port']), - 'PGUSER': pg.default_options['user'], - 'PGHOST': pg.default_options['host'], + "PGPORT": str(pg.default_options["port"]), + "PGUSER": pg.default_options["user"], + "PGHOST": pg.default_options["host"], } # Run the command. diff --git a/test_runner/batch_pg_regress/test_neon_regress.py b/test_runner/batch_pg_regress/test_neon_regress.py index 66ea67d9f1..5f13e6b2de 100644 --- a/test_runner/batch_pg_regress/test_neon_regress.py +++ b/test_runner/batch_pg_regress/test_neon_regress.py @@ -1,11 +1,8 @@ import os from pathlib import Path -from fixtures.neon_fixtures import (NeonEnv, - check_restored_datadir_content, - base_dir, - pg_distrib_dir) from fixtures.log_helper import log +from fixtures.neon_fixtures import NeonEnv, base_dir, check_restored_datadir_content, pg_distrib_dir def test_neon_regress(neon_simple_env: NeonEnv, test_output_dir: Path, pg_bin, capsys): @@ -13,35 +10,35 @@ def test_neon_regress(neon_simple_env: NeonEnv, test_output_dir: Path, pg_bin, c env.neon_cli.create_branch("test_neon_regress", "empty") # Connect to postgres and create a database called "regression". - pg = env.postgres.create_start('test_neon_regress') - pg.safe_psql('CREATE DATABASE regression') + pg = env.postgres.create_start("test_neon_regress") + pg.safe_psql("CREATE DATABASE regression") # Create some local directories for pg_regress to run in. - runpath = test_output_dir / 'regress' - (runpath / 'testtablespace').mkdir(parents=True) + runpath = test_output_dir / "regress" + (runpath / "testtablespace").mkdir(parents=True) # Compute all the file locations that pg_regress will need. # This test runs neon specific tests - build_path = os.path.join(pg_distrib_dir, 'build/src/test/regress') - src_path = os.path.join(base_dir, 'test_runner/neon_regress') - bindir = os.path.join(pg_distrib_dir, 'bin') - schedule = os.path.join(src_path, 'parallel_schedule') - pg_regress = os.path.join(build_path, 'pg_regress') + build_path = os.path.join(pg_distrib_dir, "build/src/test/regress") + src_path = os.path.join(base_dir, "test_runner/neon_regress") + bindir = os.path.join(pg_distrib_dir, "bin") + schedule = os.path.join(src_path, "parallel_schedule") + pg_regress = os.path.join(build_path, "pg_regress") pg_regress_command = [ pg_regress, - '--use-existing', - '--bindir={}'.format(bindir), - '--dlpath={}'.format(build_path), - '--schedule={}'.format(schedule), - '--inputdir={}'.format(src_path), + "--use-existing", + "--bindir={}".format(bindir), + "--dlpath={}".format(build_path), + "--schedule={}".format(schedule), + "--inputdir={}".format(src_path), ] log.info(pg_regress_command) env_vars = { - 'PGPORT': str(pg.default_options['port']), - 'PGUSER': pg.default_options['user'], - 'PGHOST': pg.default_options['host'], + "PGPORT": str(pg.default_options["port"]), + "PGUSER": pg.default_options["user"], + "PGHOST": pg.default_options["host"], } # Run the command. @@ -51,8 +48,8 @@ def test_neon_regress(neon_simple_env: NeonEnv, test_output_dir: Path, pg_bin, c pg_bin.run(pg_regress_command, env=env_vars, cwd=runpath) # checkpoint one more time to ensure that the lsn we get is the latest one - pg.safe_psql('CHECKPOINT') - lsn = pg.safe_psql('select pg_current_wal_insert_lsn()')[0][0] + pg.safe_psql("CHECKPOINT") + lsn = pg.safe_psql("select pg_current_wal_insert_lsn()")[0][0] # Check that we restore the content of the datadir correctly check_restored_datadir_content(test_output_dir, env, pg) diff --git a/test_runner/batch_pg_regress/test_pg_regress.py b/test_runner/batch_pg_regress/test_pg_regress.py index 28066d7a32..478dbf0a91 100644 --- a/test_runner/batch_pg_regress/test_pg_regress.py +++ b/test_runner/batch_pg_regress/test_pg_regress.py @@ -1,7 +1,8 @@ import os import pathlib + import pytest -from fixtures.neon_fixtures import NeonEnv, check_restored_datadir_content, base_dir, pg_distrib_dir +from fixtures.neon_fixtures import NeonEnv, base_dir, check_restored_datadir_content, pg_distrib_dir # The pg_regress tests run for a long time, especially in debug mode, @@ -12,34 +13,34 @@ def test_pg_regress(neon_simple_env: NeonEnv, test_output_dir: pathlib.Path, pg_ env.neon_cli.create_branch("test_pg_regress", "empty") # Connect to postgres and create a database called "regression". - pg = env.postgres.create_start('test_pg_regress') - pg.safe_psql('CREATE DATABASE regression') + pg = env.postgres.create_start("test_pg_regress") + pg.safe_psql("CREATE DATABASE regression") # Create some local directories for pg_regress to run in. - runpath = test_output_dir / 'regress' - (runpath / 'testtablespace').mkdir(parents=True) + runpath = test_output_dir / "regress" + (runpath / "testtablespace").mkdir(parents=True) # Compute all the file locations that pg_regress will need. - build_path = os.path.join(pg_distrib_dir, 'build/src/test/regress') - src_path = os.path.join(base_dir, 'vendor/postgres/src/test/regress') - bindir = os.path.join(pg_distrib_dir, 'bin') - schedule = os.path.join(src_path, 'parallel_schedule') - pg_regress = os.path.join(build_path, 'pg_regress') + build_path = os.path.join(pg_distrib_dir, "build/src/test/regress") + src_path = os.path.join(base_dir, "vendor/postgres/src/test/regress") + bindir = os.path.join(pg_distrib_dir, "bin") + schedule = os.path.join(src_path, "parallel_schedule") + pg_regress = os.path.join(build_path, "pg_regress") pg_regress_command = [ pg_regress, '--bindir=""', - '--use-existing', - '--bindir={}'.format(bindir), - '--dlpath={}'.format(build_path), - '--schedule={}'.format(schedule), - '--inputdir={}'.format(src_path), + "--use-existing", + "--bindir={}".format(bindir), + "--dlpath={}".format(build_path), + "--schedule={}".format(schedule), + "--inputdir={}".format(src_path), ] env_vars = { - 'PGPORT': str(pg.default_options['port']), - 'PGUSER': pg.default_options['user'], - 'PGHOST': pg.default_options['host'], + "PGPORT": str(pg.default_options["port"]), + "PGUSER": pg.default_options["user"], + "PGHOST": pg.default_options["host"], } # Run the command. @@ -49,7 +50,7 @@ def test_pg_regress(neon_simple_env: NeonEnv, test_output_dir: pathlib.Path, pg_ pg_bin.run(pg_regress_command, env=env_vars, cwd=runpath) # checkpoint one more time to ensure that the lsn we get is the latest one - pg.safe_psql('CHECKPOINT') + pg.safe_psql("CHECKPOINT") # Check that we restore the content of the datadir correctly check_restored_datadir_content(test_output_dir, env, pg) diff --git a/test_runner/conftest.py b/test_runner/conftest.py index 51545d0217..8b7f6a2eea 100644 --- a/test_runner/conftest.py +++ b/test_runner/conftest.py @@ -1,5 +1,7 @@ -pytest_plugins = ("fixtures.neon_fixtures", - "fixtures.benchmark_fixture", - "fixtures.pg_stats", - "fixtures.compare_fixtures", - "fixtures.slow") +pytest_plugins = ( + "fixtures.neon_fixtures", + "fixtures.benchmark_fixture", + "fixtures.pg_stats", + "fixtures.compare_fixtures", + "fixtures.slow", +) diff --git a/test_runner/fixtures/benchmark_fixture.py b/test_runner/fixtures/benchmark_fixture.py index cca4f7ce17..cec46f9f6d 100644 --- a/test_runner/fixtures/benchmark_fixture.py +++ b/test_runner/fixtures/benchmark_fixture.py @@ -10,12 +10,14 @@ import warnings from contextlib import contextmanager from datetime import datetime from pathlib import Path + # Type-related stuff from typing import Iterator, Optional import pytest from _pytest.config import Config from _pytest.terminal import TerminalReporter + """ This file contains fixtures for micro-benchmarks. @@ -112,8 +114,10 @@ class PgBenchRunResult: # pgbench v14: # initial connection time = 3.858 ms # tps = 309.281539 (without initial connection time) - if (line.startswith("tps = ") and ("(excluding connections establishing)" in line - or "(without initial connection time)")): + if line.startswith("tps = ") and ( + "(excluding connections establishing)" in line + or "(without initial connection time)" + ): tps = float(line.split()[2]) return cls( @@ -154,17 +158,21 @@ class PgBenchInitResult: last_line = stderr.splitlines()[-1] - regex = re.compile(r"done in (\d+\.\d+) s " - r"\(" - r"(?:drop tables (\d+\.\d+) s)?(?:, )?" - r"(?:create tables (\d+\.\d+) s)?(?:, )?" - r"(?:client-side generate (\d+\.\d+) s)?(?:, )?" - r"(?:vacuum (\d+\.\d+) s)?(?:, )?" - r"(?:primary keys (\d+\.\d+) s)?(?:, )?" - r"\)\.") + regex = re.compile( + r"done in (\d+\.\d+) s " + r"\(" + r"(?:drop tables (\d+\.\d+) s)?(?:, )?" + r"(?:create tables (\d+\.\d+) s)?(?:, )?" + r"(?:client-side generate (\d+\.\d+) s)?(?:, )?" + r"(?:vacuum (\d+\.\d+) s)?(?:, )?" + r"(?:primary keys (\d+\.\d+) s)?(?:, )?" + r"\)\." + ) if (m := regex.match(last_line)) is not None: - total, drop_tables, create_tables, client_side_generate, vacuum, primary_keys = [float(v) for v in m.groups() if v is not None] + total, drop_tables, create_tables, client_side_generate, vacuum, primary_keys = [ + float(v) for v in m.groups() if v is not None + ] else: raise RuntimeError(f"can't parse pgbench initialize results from `{last_line}`") @@ -185,11 +193,11 @@ class PgBenchInitResult: class MetricReport(str, enum.Enum): # str is a hack to make it json serializable # this means that this is a constant test parameter # like number of transactions, or number of clients - TEST_PARAM = 'test_param' + TEST_PARAM = "test_param" # reporter can use it to mark test runs with higher values as improvements - HIGHER_IS_BETTER = 'higher_is_better' + HIGHER_IS_BETTER = "higher_is_better" # the same but for lower values - LOWER_IS_BETTER = 'lower_is_better' + LOWER_IS_BETTER = "lower_is_better" class NeonBenchmarker: @@ -197,6 +205,7 @@ class NeonBenchmarker: An object for recording benchmark results. This is created for each test function by the zenbenchmark fixture """ + def __init__(self, property_recorder): # property recorder here is a pytest fixture provided by junitxml module # https://docs.pytest.org/en/6.2.x/reference.html#pytest.junitxml.record_property @@ -244,43 +253,57 @@ class NeonBenchmarker: ) def record_pg_bench_result(self, prefix: str, pg_bench_result: PgBenchRunResult): - self.record(f"{prefix}.number_of_clients", - pg_bench_result.number_of_clients, - '', - MetricReport.TEST_PARAM) - self.record(f"{prefix}.number_of_threads", - pg_bench_result.number_of_threads, - '', - MetricReport.TEST_PARAM) + self.record( + f"{prefix}.number_of_clients", + pg_bench_result.number_of_clients, + "", + MetricReport.TEST_PARAM, + ) + self.record( + f"{prefix}.number_of_threads", + pg_bench_result.number_of_threads, + "", + MetricReport.TEST_PARAM, + ) self.record( f"{prefix}.number_of_transactions_actually_processed", pg_bench_result.number_of_transactions_actually_processed, - '', + "", # that's because this is predefined by test matrix and doesn't change across runs report=MetricReport.TEST_PARAM, ) - self.record(f"{prefix}.latency_average", - pg_bench_result.latency_average, - unit="ms", - report=MetricReport.LOWER_IS_BETTER) + self.record( + f"{prefix}.latency_average", + pg_bench_result.latency_average, + unit="ms", + report=MetricReport.LOWER_IS_BETTER, + ) if pg_bench_result.latency_stddev is not None: - self.record(f"{prefix}.latency_stddev", - pg_bench_result.latency_stddev, - unit="ms", - report=MetricReport.LOWER_IS_BETTER) - self.record(f"{prefix}.tps", pg_bench_result.tps, '', report=MetricReport.HIGHER_IS_BETTER) - self.record(f"{prefix}.run_duration", - pg_bench_result.run_duration, - unit="s", - report=MetricReport.LOWER_IS_BETTER) - self.record(f"{prefix}.run_start_timestamp", - pg_bench_result.run_start_timestamp, - '', - MetricReport.TEST_PARAM) - self.record(f"{prefix}.run_end_timestamp", - pg_bench_result.run_end_timestamp, - '', - MetricReport.TEST_PARAM) + self.record( + f"{prefix}.latency_stddev", + pg_bench_result.latency_stddev, + unit="ms", + report=MetricReport.LOWER_IS_BETTER, + ) + self.record(f"{prefix}.tps", pg_bench_result.tps, "", report=MetricReport.HIGHER_IS_BETTER) + self.record( + f"{prefix}.run_duration", + pg_bench_result.run_duration, + unit="s", + report=MetricReport.LOWER_IS_BETTER, + ) + self.record( + f"{prefix}.run_start_timestamp", + pg_bench_result.run_start_timestamp, + "", + MetricReport.TEST_PARAM, + ) + self.record( + f"{prefix}.run_end_timestamp", + pg_bench_result.run_end_timestamp, + "", + MetricReport.TEST_PARAM, + ) def record_pg_bench_init_result(self, prefix: str, result: PgBenchInitResult): test_params = [ @@ -288,10 +311,9 @@ class NeonBenchmarker: "end_timestamp", ] for test_param in test_params: - self.record(f"{prefix}.{test_param}", - getattr(result, test_param), - '', - MetricReport.TEST_PARAM) + self.record( + f"{prefix}.{test_param}", getattr(result, test_param), "", MetricReport.TEST_PARAM + ) metrics = [ "duration", @@ -303,10 +325,9 @@ class NeonBenchmarker: ] for metric in metrics: if (value := getattr(result, metric)) is not None: - self.record(f"{prefix}.{metric}", - value, - unit="s", - report=MetricReport.LOWER_IS_BETTER) + self.record( + f"{prefix}.{metric}", value, unit="s", report=MetricReport.LOWER_IS_BETTER + ) def get_io_writes(self, pageserver) -> int: """ @@ -319,7 +340,7 @@ class NeonBenchmarker: """ Fetch the "maxrss" metric from the pageserver """ - metric_name = r'libmetrics_maxrss_kb' + metric_name = r"libmetrics_maxrss_kb" return self.get_int_counter_value(pageserver, metric_name) def get_int_counter_value(self, pageserver, metric_name) -> int: @@ -332,7 +353,7 @@ class NeonBenchmarker: # all prometheus metrics are floats. So to be pedantic, read it as a float # and round to integer. all_metrics = pageserver.http_client().get_metrics() - matches = re.search(fr'^{metric_name} (\S+)$', all_metrics, re.MULTILINE) + matches = re.search(rf"^{metric_name} (\S+)$", all_metrics, re.MULTILINE) assert matches return int(round(float(matches.group(1)))) @@ -358,10 +379,12 @@ class NeonBenchmarker: yield after = self.get_io_writes(pageserver) - self.record(metric_name, - round((after - before) / (1024 * 1024)), - "MB", - report=MetricReport.LOWER_IS_BETTER) + self.record( + metric_name, + round((after - before) / (1024 * 1024)), + "MB", + report=MetricReport.LOWER_IS_BETTER, + ) @pytest.fixture(scope="function") @@ -410,8 +433,9 @@ def pytest_terminal_summary(terminalreporter: TerminalReporter, exitstatus: int, result_entry = [] for _, recorded_property in test_report.user_properties: - terminalreporter.write("{}.{}: ".format(test_report.head_line, - recorded_property["name"])) + terminalreporter.write( + "{}.{}: ".format(test_report.head_line, recorded_property["name"]) + ) unit = recorded_property["unit"] value = recorded_property["value"] if unit == "MB": @@ -426,11 +450,13 @@ def pytest_terminal_summary(terminalreporter: TerminalReporter, exitstatus: int, result_entry.append(recorded_property) - result.append({ - "suit": test_report.nodeid, - "total_duration": test_report.duration, - "data": result_entry, - }) + result.append( + { + "suit": test_report.nodeid, + "total_duration": test_report.duration, + "data": result_entry, + } + ) out_dir = config.getoption("out_dir") if out_dir is None: @@ -442,6 +468,5 @@ def pytest_terminal_summary(terminalreporter: TerminalReporter, exitstatus: int, return get_out_path(Path(out_dir), revision=revision).write_text( - json.dumps({ - "revision": revision, "platform": platform, "result": result - }, indent=4)) + json.dumps({"revision": revision, "platform": platform, "result": result}, indent=4) + ) diff --git a/test_runner/fixtures/compare_fixtures.py b/test_runner/fixtures/compare_fixtures.py index e6c3a79697..6bca5be335 100644 --- a/test_runner/fixtures/compare_fixtures.py +++ b/test_runner/fixtures/compare_fixtures.py @@ -1,14 +1,14 @@ -import pytest -from contextlib import contextmanager from abc import ABC, abstractmethod -from fixtures.pg_stats import PgStatTable - -from fixtures.neon_fixtures import PgBin, PgProtocol, VanillaPostgres, RemotePostgres, NeonEnv -from fixtures.benchmark_fixture import MetricReport, NeonBenchmarker +from contextlib import contextmanager # Type-related stuff from typing import Dict, List +import pytest +from fixtures.benchmark_fixture import MetricReport, NeonBenchmarker +from fixtures.neon_fixtures import NeonEnv, PgBin, PgProtocol, RemotePostgres, VanillaPostgres +from fixtures.pg_stats import PgStatTable + class PgCompare(ABC): """Common interface of all postgres implementations, useful for benchmarks. @@ -16,6 +16,7 @@ class PgCompare(ABC): This class is a helper class for the neon_with_baseline fixture. See its documentation for more details. """ + @property @abstractmethod def pg(self) -> PgProtocol: @@ -61,7 +62,7 @@ class PgCompare(ABC): data = self._retrieve_pg_stats(pg_stats) for k in set(init_data) & set(data): - self.zenbenchmark.record(k, data[k] - init_data[k], '', MetricReport.HIGHER_IS_BETTER) + self.zenbenchmark.record(k, data[k] - init_data[k], "", MetricReport.HIGHER_IS_BETTER) def _retrieve_pg_stats(self, pg_stats: List[PgStatTable]) -> Dict[str, int]: results: Dict[str, int] = {} @@ -81,17 +82,16 @@ class PgCompare(ABC): class NeonCompare(PgCompare): """PgCompare interface for the neon stack.""" - def __init__(self, - zenbenchmark: NeonBenchmarker, - neon_simple_env: NeonEnv, - pg_bin: PgBin, - branch_name): + + def __init__( + self, zenbenchmark: NeonBenchmarker, neon_simple_env: NeonEnv, pg_bin: PgBin, branch_name + ): self.env = neon_simple_env self._zenbenchmark = zenbenchmark self._pg_bin = pg_bin # We only use one branch and one timeline - self.env.neon_cli.create_branch(branch_name, 'empty') + self.env.neon_cli.create_branch(branch_name, "empty") self._pg = self.env.postgres.create_start(branch_name) self.timeline = self.pg.safe_psql("SHOW neon.timeline_id")[0][0] @@ -118,32 +118,33 @@ class NeonCompare(PgCompare): self.pscur.execute(f"compact {self.env.initial_tenant.hex} {self.timeline}") def report_peak_memory_use(self) -> None: - self.zenbenchmark.record("peak_mem", - self.zenbenchmark.get_peak_mem(self.env.pageserver) / 1024, - 'MB', - report=MetricReport.LOWER_IS_BETTER) + self.zenbenchmark.record( + "peak_mem", + self.zenbenchmark.get_peak_mem(self.env.pageserver) / 1024, + "MB", + report=MetricReport.LOWER_IS_BETTER, + ) def report_size(self) -> None: - timeline_size = self.zenbenchmark.get_timeline_size(self.env.repo_dir, - self.env.initial_tenant, - self.timeline) - self.zenbenchmark.record('size', - timeline_size / (1024 * 1024), - 'MB', - report=MetricReport.LOWER_IS_BETTER) + timeline_size = self.zenbenchmark.get_timeline_size( + self.env.repo_dir, self.env.initial_tenant, self.timeline + ) + self.zenbenchmark.record( + "size", timeline_size / (1024 * 1024), "MB", report=MetricReport.LOWER_IS_BETTER + ) total_files = self.zenbenchmark.get_int_counter_value( - self.env.pageserver, "pageserver_created_persistent_files_total") + self.env.pageserver, "pageserver_created_persistent_files_total" + ) total_bytes = self.zenbenchmark.get_int_counter_value( - self.env.pageserver, "pageserver_written_persistent_bytes_total") - self.zenbenchmark.record("data_uploaded", - total_bytes / (1024 * 1024), - "MB", - report=MetricReport.LOWER_IS_BETTER) - self.zenbenchmark.record("num_files_uploaded", - total_files, - "", - report=MetricReport.LOWER_IS_BETTER) + self.env.pageserver, "pageserver_written_persistent_bytes_total" + ) + self.zenbenchmark.record( + "data_uploaded", total_bytes / (1024 * 1024), "MB", report=MetricReport.LOWER_IS_BETTER + ) + self.zenbenchmark.record( + "num_files_uploaded", total_files, "", report=MetricReport.LOWER_IS_BETTER + ) def record_pageserver_writes(self, out_name): return self.zenbenchmark.record_pageserver_writes(self.env.pageserver, out_name) @@ -154,13 +155,16 @@ class NeonCompare(PgCompare): class VanillaCompare(PgCompare): """PgCompare interface for vanilla postgres.""" + def __init__(self, zenbenchmark, vanilla_pg: VanillaPostgres): self._pg = vanilla_pg self._zenbenchmark = zenbenchmark - vanilla_pg.configure([ - 'shared_buffers=1MB', - 'synchronous_commit=off', - ]) + vanilla_pg.configure( + [ + "shared_buffers=1MB", + "synchronous_commit=off", + ] + ) vanilla_pg.start() # Long-lived cursor, useful for flushing @@ -186,16 +190,14 @@ class VanillaCompare(PgCompare): pass # TODO find something def report_size(self) -> None: - data_size = self.pg.get_subdir_size('base') - self.zenbenchmark.record('data_size', - data_size / (1024 * 1024), - 'MB', - report=MetricReport.LOWER_IS_BETTER) - wal_size = self.pg.get_subdir_size('pg_wal') - self.zenbenchmark.record('wal_size', - wal_size / (1024 * 1024), - 'MB', - report=MetricReport.LOWER_IS_BETTER) + data_size = self.pg.get_subdir_size("base") + self.zenbenchmark.record( + "data_size", data_size / (1024 * 1024), "MB", report=MetricReport.LOWER_IS_BETTER + ) + wal_size = self.pg.get_subdir_size("pg_wal") + self.zenbenchmark.record( + "wal_size", wal_size / (1024 * 1024), "MB", report=MetricReport.LOWER_IS_BETTER + ) @contextmanager def record_pageserver_writes(self, out_name): @@ -207,6 +209,7 @@ class VanillaCompare(PgCompare): class RemoteCompare(PgCompare): """PgCompare interface for a remote postgres instance.""" + def __init__(self, zenbenchmark, remote_pg: RemotePostgres): self._pg = remote_pg self._zenbenchmark = zenbenchmark @@ -247,18 +250,18 @@ class RemoteCompare(PgCompare): return self.zenbenchmark.record_duration(out_name) -@pytest.fixture(scope='function') +@pytest.fixture(scope="function") def neon_compare(request, zenbenchmark, pg_bin, neon_simple_env) -> NeonCompare: branch_name = request.node.name return NeonCompare(zenbenchmark, neon_simple_env, pg_bin, branch_name) -@pytest.fixture(scope='function') +@pytest.fixture(scope="function") def vanilla_compare(zenbenchmark, vanilla_pg) -> VanillaCompare: return VanillaCompare(zenbenchmark, vanilla_pg) -@pytest.fixture(scope='function') +@pytest.fixture(scope="function") def remote_compare(zenbenchmark, remote_pg) -> RemoteCompare: return RemoteCompare(zenbenchmark, remote_pg) diff --git a/test_runner/fixtures/log_helper.py b/test_runner/fixtures/log_helper.py index 7c2d83d4e3..17f2402391 100644 --- a/test_runner/fixtures/log_helper.py +++ b/test_runner/fixtures/log_helper.py @@ -1,5 +1,6 @@ import logging import logging.config + """ This file configures logging to use in python tests. Logs are automatically captured and shown in their @@ -22,20 +23,16 @@ https://docs.pytest.org/en/6.2.x/logging.html LOGGING = { "version": 1, "loggers": { - "root": { - "level": "INFO" - }, - "root.safekeeper_async": { - "level": "INFO" # a lot of logs on DEBUG level - } - } + "root": {"level": "INFO"}, + "root.safekeeper_async": {"level": "INFO"}, # a lot of logs on DEBUG level + }, } -def getLogger(name='root') -> logging.Logger: +def getLogger(name="root") -> logging.Logger: """Method to get logger for tests. - Should be used to get correctly initialized logger. """ + Should be used to get correctly initialized logger.""" return logging.getLogger(name) diff --git a/test_runner/fixtures/metrics.py b/test_runner/fixtures/metrics.py index 6fc62c6ea9..6159e273c0 100644 --- a/test_runner/fixtures/metrics.py +++ b/test_runner/fixtures/metrics.py @@ -1,10 +1,10 @@ -from dataclasses import dataclass -from prometheus_client.parser import text_string_to_metric_families -from prometheus_client.samples import Sample -from typing import Dict, List from collections import defaultdict +from dataclasses import dataclass +from typing import Dict, List from fixtures.log_helper import log +from prometheus_client.parser import text_string_to_metric_families +from prometheus_client.samples import Sample class Metrics: diff --git a/test_runner/fixtures/neon_fixtures.py b/test_runner/fixtures/neon_fixtures.py index 4483355c4c..388cc34182 100644 --- a/test_runner/fixtures/neon_fixtures.py +++ b/test_runner/fixtures/neon_fixtures.py @@ -1,47 +1,45 @@ from __future__ import annotations -from dataclasses import field -from contextlib import contextmanager -from enum import Flag, auto -import enum -import textwrap -from cached_property import cached_property import abc -import asyncpg -import os -import boto3 -import pathlib -import uuid -import warnings -import jwt +import enum +import filecmp import json -import psycopg2 -import pytest +import os +import pathlib import re import shutil import socket import subprocess -import time -import filecmp -import tempfile import tarfile - -from contextlib import closing +import tempfile +import textwrap +import time +import uuid +import warnings +from contextlib import closing, contextmanager +from dataclasses import dataclass, field +from enum import Flag, auto from pathlib import Path -from dataclasses import dataclass +from typing import Any, Callable, Dict, Iterator, List, Optional, Tuple, TypeVar, Union, cast + +import allure # type: ignore +import asyncpg +import backoff # type: ignore +import boto3 +import jwt +import psycopg2 +import pytest +import requests +from cached_property import cached_property +from fixtures.log_helper import log # Type-related stuff from psycopg2.extensions import connection as PgConnection from psycopg2.extensions import make_dsn, parse_dsn -from typing import Any, Callable, Dict, Iterator, List, Optional, TypeVar, cast, Union, Tuple from typing_extensions import Literal -import allure # type: ignore -import requests -import backoff # type: ignore +from .utils import etcd_path, get_self_dir, lsn_from_hex, lsn_to_hex, subprocess_capture -from .utils import (etcd_path, get_self_dir, subprocess_capture, lsn_from_hex, lsn_to_hex) -from fixtures.log_helper import log """ This file contains pytest fixtures. A fixture is a test resource that can be summoned by placing its name in the test's arguments. @@ -60,11 +58,11 @@ put directly-importable functions into utils.py or another separate file. """ Env = Dict[str, str] -Fn = TypeVar('Fn', bound=Callable[..., Any]) +Fn = TypeVar("Fn", bound=Callable[..., Any]) -DEFAULT_OUTPUT_DIR = 'test_output' -DEFAULT_POSTGRES_DIR = 'tmp_install' -DEFAULT_BRANCH_NAME = 'main' +DEFAULT_OUTPUT_DIR = "test_output" +DEFAULT_POSTGRES_DIR = "tmp_install" +DEFAULT_BRANCH_NAME = "main" BASE_PORT = 15000 WORKER_PORT_NUM = 1000 @@ -92,7 +90,7 @@ def check_interferring_processes(config): return # does not use -c as it is not supported on macOS - cmd = ['pgrep', 'pageserver|postgres|safekeeper'] + cmd = ["pgrep", "pageserver|postgres|safekeeper"] result = subprocess.run(cmd, stdout=subprocess.DEVNULL) if result.returncode == 0: # returncode of 0 means it found something. @@ -100,7 +98,7 @@ def check_interferring_processes(config): # result of the test. # NOTE this shows as an internal pytest error, there might be a better way raise Exception( - 'Found interfering processes running. Stop all Neon pageservers, nodes, safekeepers, as well as stand-alone Postgres.' + "Found interfering processes running. Stop all Neon pageservers, nodes, safekeepers, as well as stand-alone Postgres." ) @@ -111,18 +109,20 @@ def pytest_configure(config): """ check_interferring_processes(config) - numprocesses = config.getoption('numprocesses') - if numprocesses is not None and BASE_PORT + numprocesses * WORKER_PORT_NUM > 32768: # do not use ephemeral ports - raise Exception('Too many workers configured. Cannot distribute ports for services.') + numprocesses = config.getoption("numprocesses") + if ( + numprocesses is not None and BASE_PORT + numprocesses * WORKER_PORT_NUM > 32768 + ): # do not use ephemeral ports + raise Exception("Too many workers configured. Cannot distribute ports for services.") # find the base directory (currently this is the git root) global base_dir - base_dir = os.path.normpath(os.path.join(get_self_dir(), '../..')) - log.info(f'base_dir is {base_dir}') + base_dir = os.path.normpath(os.path.join(get_self_dir(), "../..")) + log.info(f"base_dir is {base_dir}") # Compute the top-level directory for all tests. global top_output_dir - env_test_output = os.environ.get('TEST_OUTPUT') + env_test_output = os.environ.get("TEST_OUTPUT") if env_test_output is not None: top_output_dir = env_test_output else: @@ -131,18 +131,18 @@ def pytest_configure(config): # Find the postgres installation. global pg_distrib_dir - env_postgres_bin = os.environ.get('POSTGRES_DISTRIB_DIR') + env_postgres_bin = os.environ.get("POSTGRES_DISTRIB_DIR") if env_postgres_bin: pg_distrib_dir = env_postgres_bin else: pg_distrib_dir = os.path.normpath(os.path.join(base_dir, DEFAULT_POSTGRES_DIR)) - log.info(f'pg_distrib_dir is {pg_distrib_dir}') + log.info(f"pg_distrib_dir is {pg_distrib_dir}") if os.getenv("REMOTE_ENV"): # When testing against a remote server, we only need the client binary. - if not os.path.exists(os.path.join(pg_distrib_dir, 'bin/psql')): + if not os.path.exists(os.path.join(pg_distrib_dir, "bin/psql")): raise Exception('psql not found at "{}"'.format(pg_distrib_dir)) else: - if not os.path.exists(os.path.join(pg_distrib_dir, 'bin/postgres')): + if not os.path.exists(os.path.join(pg_distrib_dir, "bin/postgres")): raise Exception('postgres not found at "{}"'.format(pg_distrib_dir)) if os.getenv("REMOTE_ENV"): @@ -151,25 +151,26 @@ def pytest_configure(config): return # Find the neon binaries. global neon_binpath - env_neon_bin = os.environ.get('NEON_BIN') + env_neon_bin = os.environ.get("NEON_BIN") if env_neon_bin: neon_binpath = env_neon_bin else: - neon_binpath = os.path.join(base_dir, 'target/debug') - log.info(f'neon_binpath is {neon_binpath}') - if not os.path.exists(os.path.join(neon_binpath, 'pageserver')): + neon_binpath = os.path.join(base_dir, "target/debug") + log.info(f"neon_binpath is {neon_binpath}") + if not os.path.exists(os.path.join(neon_binpath, "pageserver")): raise Exception('neon binaries not found at "{}"'.format(neon_binpath)) def profiling_supported(): - """Return True if the pageserver was compiled with the 'profiling' feature - """ - bin_pageserver = os.path.join(str(neon_binpath), 'pageserver') - res = subprocess.run([bin_pageserver, '--version'], - check=True, - universal_newlines=True, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE) + """Return True if the pageserver was compiled with the 'profiling' feature""" + bin_pageserver = os.path.join(str(neon_binpath), "pageserver") + res = subprocess.run( + [bin_pageserver, "--version"], + check=True, + universal_newlines=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) return "profiling:true" in res.stdout @@ -181,21 +182,21 @@ def shareable_scope(fixture_name, config) -> Literal["session", "function"]: def myfixture(...) ... """ - return 'function' if os.environ.get('TEST_SHARED_FIXTURES') is None else 'session' + return "function" if os.environ.get("TEST_SHARED_FIXTURES") is None else "session" -@pytest.fixture(scope='session') +@pytest.fixture(scope="session") def worker_seq_no(worker_id: str): # worker_id is a pytest-xdist fixture # it can be master or gw # parse it to always get a number - if worker_id == 'master': + if worker_id == "master": return 0 - assert worker_id.startswith('gw') + assert worker_id.startswith("gw") return int(worker_id[2:]) -@pytest.fixture(scope='session') +@pytest.fixture(scope="session") def worker_base_port(worker_seq_no: int): # so we divide ports in ranges of 100 ports # so workers have disjoint set of ports for services @@ -247,15 +248,16 @@ class PortDistributor: return port else: raise RuntimeError( - 'port range configured for test is exhausted, consider enlarging the range') + "port range configured for test is exhausted, consider enlarging the range" + ) -@pytest.fixture(scope='session') +@pytest.fixture(scope="session") def port_distributor(worker_base_port): return PortDistributor(base_port=worker_base_port, port_number=WORKER_PORT_NUM) -@pytest.fixture(scope='session') +@pytest.fixture(scope="session") def default_broker(request: Any, port_distributor: PortDistributor): client_port = port_distributor.get_port() # multiple pytest sessions could get launched in parallel, get them different datadirs @@ -267,12 +269,12 @@ def default_broker(request: Any, port_distributor: PortDistributor): broker.stop() -@pytest.fixture(scope='session') +@pytest.fixture(scope="session") def run_id(): yield uuid.uuid4() -@pytest.fixture(scope='session') +@pytest.fixture(scope="session") def mock_s3_server(port_distributor: PortDistributor): mock_s3_server = MockS3Server(port_distributor.get_port()) yield mock_s3_server @@ -280,7 +282,8 @@ def mock_s3_server(port_distributor: PortDistributor): class PgProtocol: - """ Reusable connection logic """ + """Reusable connection logic""" + def __init__(self, **kwargs): self.default_options = kwargs @@ -292,18 +295,18 @@ class PgProtocol: def conn_options(self, **kwargs): result = self.default_options.copy() - if 'dsn' in kwargs: - result.update(parse_dsn(kwargs['dsn'])) + if "dsn" in kwargs: + result.update(parse_dsn(kwargs["dsn"])) result.update(kwargs) # Individual statement timeout in seconds. 2 minutes should be # enough for our tests, but if you need a longer, you can # change it by calling "SET statement_timeout" after # connecting. - options = result.get('options', '') + options = result.get("options", "") if "statement_timeout" not in options: - options = f'-cstatement_timeout=120s {options}' - result['options'] = options + options = f"-cstatement_timeout=120s {options}" + result["options"] = options return result # autocommit=True here by default because that's what we need most of the time @@ -339,19 +342,19 @@ class PgProtocol: # The psycopg2 option 'dbname' is called 'database' is asyncpg conn_options = self.conn_options(**kwargs) - if 'dbname' in conn_options: - conn_options['database'] = conn_options.pop('dbname') + if "dbname" in conn_options: + conn_options["database"] = conn_options.pop("dbname") # Convert options='-c=' to server_settings - if 'options' in conn_options: - options = conn_options.pop('options') - for match in re.finditer(r'-c(\w*)=(\w*)', options): + if "options" in conn_options: + options = conn_options.pop("options") + for match in re.finditer(r"-c(\w*)=(\w*)", options): key = match.group(1) val = match.group(2) - if 'server_options' in conn_options: - conn_options['server_settings'].update({key: val}) + if "server_options" in conn_options: + conn_options["server_settings"].update({key: val}) else: - conn_options['server_settings'] = {key: val} + conn_options["server_settings"] = {key: val} return await asyncpg.connect(**conn_options) def safe_psql(self, query: str, **kwargs: Any) -> List[Tuple[Any, ...]]: @@ -397,11 +400,9 @@ class AuthKeys: return token def generate_tenant_token(self, tenant_id): - token = jwt.encode({ - "scope": "tenant", "tenant_id": tenant_id - }, - self.priv, - algorithm="RS256") + token = jwt.encode( + {"scope": "tenant", "tenant_id": tenant_id}, self.priv, algorithm="RS256" + ) if isinstance(token, bytes): token = token.decode() @@ -416,6 +417,7 @@ class MockS3Server: Also provides a set of methods to derive the connection properties from and the method to kill the underlying server. """ + def __init__( self, port: int, @@ -425,7 +427,7 @@ class MockS3Server: # XXX: do not use `shell=True` or add `exec ` to the command here otherwise. # We use `self.subprocess.kill()` to shut down the server, which would not "just" work in Linux # if a process is started from the shell process. - self.subprocess = subprocess.Popen(['poetry', 'run', 'moto_server', 's3', f'-p{port}']) + self.subprocess = subprocess.Popen(["poetry", "run", "moto_server", "s3", f"-p{port}"]) error = None try: return_code = self.subprocess.poll() @@ -442,13 +444,13 @@ class MockS3Server: return f"http://127.0.0.1:{self.port}" def region(self) -> str: - return 'us-east-1' + return "us-east-1" def access_key(self) -> str: - return 'test' + return "test" def secret_key(self) -> str: - return 'test' + return "test" def kill(self): self.subprocess.kill() @@ -487,8 +489,8 @@ class S3Storage: def access_env_vars(self) -> Dict[str, str]: return { - 'AWS_ACCESS_KEY_ID': self.access_key, - 'AWS_SECRET_ACCESS_KEY': self.secret_key, + "AWS_ACCESS_KEY_ID": self.access_key, + "AWS_SECRET_ACCESS_KEY": self.secret_key, } @@ -528,6 +530,7 @@ class NeonEnvBuilder: created in the right directory, based on the test name, and it's properly cleaned up after the test has finished. """ + def __init__( self, repo_dir: Path, @@ -592,7 +595,7 @@ class NeonEnvBuilder: elif remote_storage_kind == RemoteStorageKind.REAL_S3: self.enable_real_s3_remote_storage(test_name=test_name, force_enable=force_enable) else: - raise RuntimeError(f'Unknown storage type: {remote_storage_kind}') + raise RuntimeError(f"Unknown storage type: {remote_storage_kind}") def enable_local_fs_remote_storage(self, force_enable=True): """ @@ -600,7 +603,7 @@ class NeonEnvBuilder: Errors, if the pageserver has some remote storage configuration already, unless `force_enable` is not set to `True`. """ assert force_enable or self.remote_storage is None, "remote storage is enabled already" - self.remote_storage = LocalFsStorage(Path(self.repo_dir / 'local_fs_remote_storage')) + self.remote_storage = LocalFsStorage(Path(self.repo_dir / "local_fs_remote_storage")) def enable_mock_s3_remote_storage(self, bucket_name: str, force_enable=True): """ @@ -613,7 +616,7 @@ class NeonEnvBuilder: mock_region = self.mock_s3_server.region() self.remote_storage_client = boto3.client( - 's3', + "s3", endpoint_url=mock_endpoint, region_name=mock_region, aws_access_key_id=self.mock_s3_server.access_key(), @@ -652,20 +655,22 @@ class NeonEnvBuilder: self.keep_remote_storage_contents = False # construct a prefix inside bucket for the particular test case and test run - self.remote_storage_prefix = f'{self.run_id}/{test_name}' + self.remote_storage_prefix = f"{self.run_id}/{test_name}" self.remote_storage_client = boto3.client( - 's3', + "s3", region_name=region, aws_access_key_id=access_key, aws_secret_access_key=secret_key, aws_session_token=session_token, ) - self.remote_storage = S3Storage(bucket_name=bucket_name, - bucket_region=region, - access_key=access_key, - secret_key=secret_key, - prefix_in_bucket=self.remote_storage_prefix) + self.remote_storage = S3Storage( + bucket_name=bucket_name, + bucket_region=region, + access_key=access_key, + secret_key=secret_key, + prefix_in_bucket=self.remote_storage_prefix, + ) def cleanup_remote_storage(self): # here wee check for true remote storage, no the local one @@ -678,26 +683,28 @@ class NeonEnvBuilder: log.info("keep_remote_storage_contents skipping remote storage cleanup") return - log.info("removing data from test s3 bucket %s by prefix %s", - self.remote_storage.bucket_name, - self.remote_storage_prefix) - paginator = self.remote_storage_client.get_paginator('list_objects_v2') + log.info( + "removing data from test s3 bucket %s by prefix %s", + self.remote_storage.bucket_name, + self.remote_storage_prefix, + ) + paginator = self.remote_storage_client.get_paginator("list_objects_v2") pages = paginator.paginate( Bucket=self.remote_storage.bucket_name, Prefix=self.remote_storage_prefix, ) - objects_to_delete = {'Objects': []} + objects_to_delete = {"Objects": []} cnt = 0 - for item in pages.search('Contents'): + for item in pages.search("Contents"): # weirdly when nothing is found it returns [None] if item is None: break - objects_to_delete['Objects'].append({'Key': item['Key']}) + objects_to_delete["Objects"].append({"Key": item["Key"]}) # flush once aws limit reached - if len(objects_to_delete['Objects']) >= 1000: + if len(objects_to_delete["Objects"]) >= 1000: self.remote_storage_client.delete_objects( Bucket=self.remote_storage.bucket_name, Delete=objects_to_delete, @@ -706,9 +713,10 @@ class NeonEnvBuilder: cnt += 1 # flush rest - if len(objects_to_delete['Objects']): - self.remote_storage_client.delete_objects(Bucket=self.remote_storage.bucket_name, - Delete=objects_to_delete) + if len(objects_to_delete["Objects"]): + self.remote_storage_client.delete_objects( + Bucket=self.remote_storage.bucket_name, Delete=objects_to_delete + ) log.info("deleted %s objects from remote storage", cnt) @@ -718,7 +726,7 @@ class NeonEnvBuilder: def __exit__(self, exc_type, exc_value, traceback): # Stop all the nodes. if self.env: - log.info('Cleaning up all storage and compute nodes') + log.info("Cleaning up all storage and compute nodes") self.env.postgres.stop_all() for sk in self.env.safekeepers: sk.stop(immediate=True) @@ -759,6 +767,7 @@ class NeonEnv: create_tenant() - initializes a new tenant in the page server, returns the tenant id """ + def __init__(self, config: NeonEnvBuilder): self.repo_dir = config.repo_dir self.rust_log_override = config.rust_log_override @@ -776,15 +785,19 @@ class NeonEnv: self.initial_tenant = uuid.uuid4() # Create a config file corresponding to the options - toml = textwrap.dedent(f""" + toml = textwrap.dedent( + f""" default_tenant_id = '{self.initial_tenant.hex}' - """) + """ + ) - toml += textwrap.dedent(f""" + toml += textwrap.dedent( + f""" [etcd_broker] broker_endpoints = ['{self.broker.client_url()}'] etcd_binary_path = '{self.broker.binary_path}' - """) + """ + ) # Create config for pageserver pageserver_port = PageserverPort( @@ -793,18 +806,20 @@ class NeonEnv: ) pageserver_auth_type = "ZenithJWT" if config.auth_enabled else "Trust" - toml += textwrap.dedent(f""" + toml += textwrap.dedent( + f""" [pageserver] id=1 listen_pg_addr = 'localhost:{pageserver_port.pg}' listen_http_addr = 'localhost:{pageserver_port.http}' auth_type = '{pageserver_auth_type}' - """) + """ + ) # Create a corresponding NeonPageserver object - self.pageserver = NeonPageserver(self, - port=pageserver_port, - config_override=config.pageserver_config_override) + self.pageserver = NeonPageserver( + self, port=pageserver_port, config_override=config.pageserver_config_override + ) # Create config and a Safekeeper object for each safekeeper for i in range(1, config.num_safekeepers + 1): @@ -813,21 +828,29 @@ class NeonEnv: http=self.port_distributor.get_port(), ) id = config.safekeepers_id_start + i # assign ids sequentially - toml += textwrap.dedent(f""" + toml += textwrap.dedent( + f""" [[safekeepers]] id = {id} pg_port = {port.pg} http_port = {port.http} - sync = {'true' if config.safekeepers_enable_fsync else 'false'}""") + sync = {'true' if config.safekeepers_enable_fsync else 'false'}""" + ) if config.auth_enabled: - toml += textwrap.dedent(f""" + toml += textwrap.dedent( + f""" auth_enabled = true - """) - if bool(self.remote_storage_users - & RemoteStorageUsers.SAFEKEEPER) and self.remote_storage is not None: - toml += textwrap.dedent(f""" + """ + ) + if ( + bool(self.remote_storage_users & RemoteStorageUsers.SAFEKEEPER) + and self.remote_storage is not None + ): + toml += textwrap.dedent( + f""" remote_storage = "{remote_storage_to_toml_inline_table(self.remote_storage)}" - """) + """ + ) safekeeper = Safekeeper(env=self, id=id, port=port) self.safekeepers.append(safekeeper) @@ -843,8 +866,8 @@ class NeonEnv: safekeeper.start() def get_safekeeper_connstrs(self) -> str: - """ Get list of safekeeper endpoints suitable for safekeepers GUC """ - return ','.join([f'localhost:{wa.port.pg}' for wa in self.safekeepers]) + """Get list of safekeeper endpoints suitable for safekeepers GUC""" + return ",".join([f"localhost:{wa.port.pg}" for wa in self.safekeepers]) def timeline_dir(self, tenant_id: uuid.UUID, timeline_id: uuid.UUID) -> Path: """Get a timeline directory's path based on the repo directory of the test environment""" @@ -852,8 +875,8 @@ class NeonEnv: @cached_property def auth_keys(self) -> AuthKeys: - pub = (Path(self.repo_dir) / 'auth_public_key.pem').read_bytes() - priv = (Path(self.repo_dir) / 'auth_private_key.pem').read_bytes() + pub = (Path(self.repo_dir) / "auth_public_key.pem").read_bytes() + priv = (Path(self.repo_dir) / "auth_private_key.pem").read_bytes() return AuthKeys(pub=pub, priv=priv) @@ -866,11 +889,11 @@ def _shared_simple_env( run_id: uuid.UUID, ) -> Iterator[NeonEnv]: """ - # Internal fixture backing the `neon_simple_env` fixture. If TEST_SHARED_FIXTURES - is set, this is shared by all tests using `neon_simple_env`. + # Internal fixture backing the `neon_simple_env` fixture. If TEST_SHARED_FIXTURES + is set, this is shared by all tests using `neon_simple_env`. """ - if os.environ.get('TEST_SHARED_FIXTURES') is None: + if os.environ.get("TEST_SHARED_FIXTURES") is None: # Create the environment in the per-test output directory repo_dir = os.path.join(get_test_output_dir(request), "repo") else: @@ -879,21 +902,21 @@ def _shared_simple_env( shutil.rmtree(repo_dir, ignore_errors=True) with NeonEnvBuilder( - repo_dir=Path(repo_dir), - port_distributor=port_distributor, - broker=default_broker, - mock_s3_server=mock_s3_server, - run_id=run_id, + repo_dir=Path(repo_dir), + port_distributor=port_distributor, + broker=default_broker, + mock_s3_server=mock_s3_server, + run_id=run_id, ) as builder: env = builder.init_start() # For convenience in tests, create a branch from the freshly-initialized cluster. - env.neon_cli.create_branch('empty', ancestor_branch_name=DEFAULT_BRANCH_NAME) + env.neon_cli.create_branch("empty", ancestor_branch_name=DEFAULT_BRANCH_NAME) yield env -@pytest.fixture(scope='function') +@pytest.fixture(scope="function") def neon_simple_env(_shared_simple_env: NeonEnv) -> Iterator[NeonEnv]: """ Simple Neon environment, with no authentication and no safekeepers. @@ -908,7 +931,7 @@ def neon_simple_env(_shared_simple_env: NeonEnv) -> Iterator[NeonEnv]: _shared_simple_env.postgres.stop_all() -@pytest.fixture(scope='function') +@pytest.fixture(scope="function") def neon_env_builder( test_output_dir, port_distributor: PortDistributor, @@ -934,11 +957,11 @@ def neon_env_builder( # Return the builder to the caller with NeonEnvBuilder( - repo_dir=Path(repo_dir), - port_distributor=port_distributor, - mock_s3_server=mock_s3_server, - broker=default_broker, - run_id=run_id, + repo_dir=Path(repo_dir), + port_distributor=port_distributor, + mock_s3_server=mock_s3_server, + broker=default_broker, + run_id=run_id, ) as builder: yield builder @@ -954,16 +977,16 @@ class NeonPageserverHttpClient(requests.Session): self.auth_token = auth_token if auth_token is not None: - self.headers['Authorization'] = f'Bearer {auth_token}' + self.headers["Authorization"] = f"Bearer {auth_token}" def verbose_error(self, res: requests.Response): try: res.raise_for_status() except requests.RequestException as e: try: - msg = res.json()['msg'] + msg = res.json()["msg"] except: - msg = '' + msg = "" raise NeonPageserverApiException(msg) from e def check_status(self): @@ -980,12 +1003,12 @@ class NeonPageserverHttpClient(requests.Session): res = self.post( f"http://localhost:{self.port}/v1/tenant", json={ - 'new_tenant_id': new_tenant_id.hex if new_tenant_id else None, + "new_tenant_id": new_tenant_id.hex if new_tenant_id else None, }, ) self.verbose_error(res) if res.status_code == 409: - raise Exception(f'could not create tenant: already exists for id {new_tenant_id}') + raise Exception(f"could not create tenant: already exists for id {new_tenant_id}") new_tenant_id = res.json() assert isinstance(new_tenant_id, str) return uuid.UUID(new_tenant_id) @@ -1019,28 +1042,29 @@ class NeonPageserverHttpClient(requests.Session): ancestor_timeline_id: Optional[uuid.UUID] = None, ancestor_start_lsn: Optional[str] = None, ) -> Dict[Any, Any]: - res = self.post(f"http://localhost:{self.port}/v1/tenant/{tenant_id.hex}/timeline", - json={ - 'new_timeline_id': - new_timeline_id.hex if new_timeline_id else None, - 'ancestor_start_lsn': - ancestor_start_lsn, - 'ancestor_timeline_id': - ancestor_timeline_id.hex if ancestor_timeline_id else None, - }) + res = self.post( + f"http://localhost:{self.port}/v1/tenant/{tenant_id.hex}/timeline", + json={ + "new_timeline_id": new_timeline_id.hex if new_timeline_id else None, + "ancestor_start_lsn": ancestor_start_lsn, + "ancestor_timeline_id": ancestor_timeline_id.hex if ancestor_timeline_id else None, + }, + ) self.verbose_error(res) if res.status_code == 409: - raise Exception(f'could not create timeline: already exists for id {new_timeline_id}') + raise Exception(f"could not create timeline: already exists for id {new_timeline_id}") res_json = res.json() assert isinstance(res_json, dict) return res_json - def timeline_detail(self, - tenant_id: uuid.UUID, - timeline_id: uuid.UUID, - include_non_incremental_logical_size: bool = False, - include_non_incremental_physical_size: bool = False) -> Dict[Any, Any]: + def timeline_detail( + self, + tenant_id: uuid.UUID, + timeline_id: uuid.UUID, + include_non_incremental_logical_size: bool = False, + include_non_incremental_physical_size: bool = False, + ) -> Dict[Any, Any]: include_non_incremental_logical_size_str = "0" if include_non_incremental_logical_size: @@ -1051,9 +1075,10 @@ class NeonPageserverHttpClient(requests.Session): include_non_incremental_physical_size_str = "1" res = self.get( - f"http://localhost:{self.port}/v1/tenant/{tenant_id.hex}/timeline/{timeline_id.hex}" + - "?include-non-incremental-logical-size={include_non_incremental_logical_size_str}" + - "&include-non-incremental-physical-size={include_non_incremental_physical_size_str}") + f"http://localhost:{self.port}/v1/tenant/{tenant_id.hex}/timeline/{timeline_id.hex}" + + "?include-non-incremental-logical-size={include_non_incremental_logical_size_str}" + + "&include-non-incremental-physical-size={include_non_incremental_physical_size_str}" + ) self.verbose_error(res) res_json = res.json() assert isinstance(res_json, dict) @@ -1061,7 +1086,8 @@ class NeonPageserverHttpClient(requests.Session): def timeline_delete(self, tenant_id: uuid.UUID, timeline_id: uuid.UUID): res = self.delete( - f"http://localhost:{self.port}/v1/tenant/{tenant_id.hex}/timeline/{timeline_id.hex}") + f"http://localhost:{self.port}/v1/tenant/{tenant_id.hex}/timeline/{timeline_id.hex}" + ) self.verbose_error(res) res_json = res.json() assert res_json is None @@ -1079,12 +1105,15 @@ class PageserverPort: http: int -CREATE_TIMELINE_ID_EXTRACTOR = re.compile(r"^Created timeline '(?P[^']+)'", - re.MULTILINE) -CREATE_TIMELINE_ID_EXTRACTOR = re.compile(r"^Created timeline '(?P[^']+)'", - re.MULTILINE) -TIMELINE_DATA_EXTRACTOR = re.compile(r"\s(?P[^\s]+)\s\[(?P[^\]]+)\]", - re.MULTILINE) +CREATE_TIMELINE_ID_EXTRACTOR = re.compile( + r"^Created timeline '(?P[^']+)'", re.MULTILINE +) +CREATE_TIMELINE_ID_EXTRACTOR = re.compile( + r"^Created timeline '(?P[^']+)'", re.MULTILINE +) +TIMELINE_DATA_EXTRACTOR = re.compile( + r"\s(?P[^\s]+)\s\[(?P[^\]]+)\]", re.MULTILINE +) class AbstractNeonCli(abc.ABC): @@ -1093,15 +1122,18 @@ class AbstractNeonCli(abc.ABC): Supports a way to run arbitrary command directly via CLI. Do not use directly, use specific subclasses instead. """ + def __init__(self, env: NeonEnv): self.env = env COMMAND: str = cast(str, None) # To be overwritten by the derived class. - def raw_cli(self, - arguments: List[str], - extra_env_vars: Optional[Dict[str, str]] = None, - check_return_code=True) -> 'subprocess.CompletedProcess[str]': + def raw_cli( + self, + arguments: List[str], + extra_env_vars: Optional[Dict[str, str]] = None, + check_return_code=True, + ) -> "subprocess.CompletedProcess[str]": """ Run the command with the specified arguments. @@ -1122,30 +1154,32 @@ class AbstractNeonCli(abc.ABC): bin_neon = os.path.join(str(neon_binpath), self.COMMAND) args = [bin_neon] + arguments - log.info('Running command "{}"'.format(' '.join(args))) + log.info('Running command "{}"'.format(" ".join(args))) log.info(f'Running in "{self.env.repo_dir}"') env_vars = os.environ.copy() - env_vars['NEON_REPO_DIR'] = str(self.env.repo_dir) - env_vars['POSTGRES_DISTRIB_DIR'] = str(pg_distrib_dir) + env_vars["NEON_REPO_DIR"] = str(self.env.repo_dir) + env_vars["POSTGRES_DISTRIB_DIR"] = str(pg_distrib_dir) if self.env.rust_log_override is not None: - env_vars['RUST_LOG'] = self.env.rust_log_override + env_vars["RUST_LOG"] = self.env.rust_log_override for (extra_env_key, extra_env_value) in (extra_env_vars or {}).items(): env_vars[extra_env_key] = extra_env_value # Pass coverage settings - var = 'LLVM_PROFILE_FILE' + var = "LLVM_PROFILE_FILE" val = os.environ.get(var) if val: env_vars[var] = val # Intercept CalledProcessError and print more info - res = subprocess.run(args, - env=env_vars, - check=False, - universal_newlines=True, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE) + res = subprocess.run( + args, + env=env_vars, + check=False, + universal_newlines=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) if not res.returncode: log.info(f"Run success: {res.stdout}") elif check_return_code: @@ -1156,10 +1190,9 @@ class AbstractNeonCli(abc.ABC): stderr: {res.stderr} """ log.info(msg) - raise Exception(msg) from subprocess.CalledProcessError(res.returncode, - res.args, - res.stdout, - res.stderr) + raise Exception(msg) from subprocess.CalledProcessError( + res.returncode, res.args, res.stdout, res.stderr + ) return res @@ -1169,12 +1202,14 @@ class NeonCli(AbstractNeonCli): Supports main commands via typed methods and a way to run arbitrary command directly via CLI. """ - COMMAND = 'neon_local' + COMMAND = "neon_local" - def create_tenant(self, - tenant_id: Optional[uuid.UUID] = None, - timeline_id: Optional[uuid.UUID] = None, - conf: Optional[Dict[str, str]] = None) -> Tuple[uuid.UUID, uuid.UUID]: + def create_tenant( + self, + tenant_id: Optional[uuid.UUID] = None, + timeline_id: Optional[uuid.UUID] = None, + conf: Optional[Dict[str, str]] = None, + ) -> Tuple[uuid.UUID, uuid.UUID]: """ Creates a new tenant, returns its id and its initial timeline's id. """ @@ -1183,13 +1218,14 @@ class NeonCli(AbstractNeonCli): if timeline_id is None: timeline_id = uuid.uuid4() if conf is None: - res = self.raw_cli([ - 'tenant', 'create', '--tenant-id', tenant_id.hex, '--timeline-id', timeline_id.hex - ]) + res = self.raw_cli( + ["tenant", "create", "--tenant-id", tenant_id.hex, "--timeline-id", timeline_id.hex] + ) else: - res = self.raw_cli([ - 'tenant', 'create', '--tenant-id', tenant_id.hex, '--timeline-id', timeline_id.hex - ] + sum(list(map(lambda kv: (['-c', kv[0] + ':' + kv[1]]), conf.items())), [])) + res = self.raw_cli( + ["tenant", "create", "--tenant-id", tenant_id.hex, "--timeline-id", timeline_id.hex] + + sum(list(map(lambda kv: (["-c", kv[0] + ":" + kv[1]]), conf.items())), []) + ) res.check_returncode() return tenant_id, timeline_id @@ -1198,27 +1234,28 @@ class NeonCli(AbstractNeonCli): Update tenant config. """ if conf is None: - res = self.raw_cli(['tenant', 'config', '--tenant-id', tenant_id.hex]) + res = self.raw_cli(["tenant", "config", "--tenant-id", tenant_id.hex]) else: res = self.raw_cli( - ['tenant', 'config', '--tenant-id', tenant_id.hex] + - sum(list(map(lambda kv: (['-c', kv[0] + ':' + kv[1]]), conf.items())), [])) + ["tenant", "config", "--tenant-id", tenant_id.hex] + + sum(list(map(lambda kv: (["-c", kv[0] + ":" + kv[1]]), conf.items())), []) + ) res.check_returncode() - def list_tenants(self) -> 'subprocess.CompletedProcess[str]': - res = self.raw_cli(['tenant', 'list']) + def list_tenants(self) -> "subprocess.CompletedProcess[str]": + res = self.raw_cli(["tenant", "list"]) res.check_returncode() return res - def create_timeline(self, - new_branch_name: str, - tenant_id: Optional[uuid.UUID] = None) -> uuid.UUID: + def create_timeline( + self, new_branch_name: str, tenant_id: Optional[uuid.UUID] = None + ) -> uuid.UUID: cmd = [ - 'timeline', - 'create', - '--branch-name', + "timeline", + "create", + "--branch-name", new_branch_name, - '--tenant-id', + "--tenant-id", (tenant_id or self.env.initial_tenant).hex, ] @@ -1229,17 +1266,17 @@ class NeonCli(AbstractNeonCli): created_timeline_id = None if matches is not None: - created_timeline_id = matches.group('timeline_id') + created_timeline_id = matches.group("timeline_id") return uuid.UUID(created_timeline_id) def create_root_branch(self, branch_name: str, tenant_id: Optional[uuid.UUID] = None): cmd = [ - 'timeline', - 'create', - '--branch-name', + "timeline", + "create", + "--branch-name", branch_name, - '--tenant-id', + "--tenant-id", (tenant_id or self.env.initial_tenant).hex, ] @@ -1250,30 +1287,32 @@ class NeonCli(AbstractNeonCli): created_timeline_id = None if matches is not None: - created_timeline_id = matches.group('timeline_id') + created_timeline_id = matches.group("timeline_id") if created_timeline_id is None: - raise Exception('could not find timeline id after `neon timeline create` invocation') + raise Exception("could not find timeline id after `neon timeline create` invocation") else: return uuid.UUID(created_timeline_id) - def create_branch(self, - new_branch_name: str = DEFAULT_BRANCH_NAME, - ancestor_branch_name: Optional[str] = None, - tenant_id: Optional[uuid.UUID] = None, - ancestor_start_lsn: Optional[str] = None) -> uuid.UUID: + def create_branch( + self, + new_branch_name: str = DEFAULT_BRANCH_NAME, + ancestor_branch_name: Optional[str] = None, + tenant_id: Optional[uuid.UUID] = None, + ancestor_start_lsn: Optional[str] = None, + ) -> uuid.UUID: cmd = [ - 'timeline', - 'branch', - '--branch-name', + "timeline", + "branch", + "--branch-name", new_branch_name, - '--tenant-id', + "--tenant-id", (tenant_id or self.env.initial_tenant).hex, ] if ancestor_branch_name is not None: - cmd.extend(['--ancestor-branch-name', ancestor_branch_name]) + cmd.extend(["--ancestor-branch-name", ancestor_branch_name]) if ancestor_start_lsn is not None: - cmd.extend(['--ancestor-start-lsn', ancestor_start_lsn]) + cmd.extend(["--ancestor-start-lsn", ancestor_start_lsn]) res = self.raw_cli(cmd) res.check_returncode() @@ -1282,10 +1321,10 @@ class NeonCli(AbstractNeonCli): created_timeline_id = None if matches is not None: - created_timeline_id = matches.group('timeline_id') + created_timeline_id = matches.group("timeline_id") if created_timeline_id is None: - raise Exception('could not find timeline id after `neon timeline create` invocation') + raise Exception("could not find timeline id after `neon timeline create` invocation") else: return uuid.UUID(created_timeline_id) @@ -1297,52 +1336,60 @@ class NeonCli(AbstractNeonCli): # (L) main [b49f7954224a0ad25cc0013ea107b54b] # (L) ┣━ @0/16B5A50: test_cli_branch_list_main [20f98c79111b9015d84452258b7d5540] res = self.raw_cli( - ['timeline', 'list', '--tenant-id', (tenant_id or self.env.initial_tenant).hex]) + ["timeline", "list", "--tenant-id", (tenant_id or self.env.initial_tenant).hex] + ) timelines_cli = sorted( - map(lambda branch_and_id: (branch_and_id[0], branch_and_id[1]), - TIMELINE_DATA_EXTRACTOR.findall(res.stdout))) + map( + lambda branch_and_id: (branch_and_id[0], branch_and_id[1]), + TIMELINE_DATA_EXTRACTOR.findall(res.stdout), + ) + ) return timelines_cli - def init(self, - config_toml: str, - initial_timeline_id: Optional[uuid.UUID] = None) -> 'subprocess.CompletedProcess[str]': - with tempfile.NamedTemporaryFile(mode='w+') as tmp: + def init( + self, config_toml: str, initial_timeline_id: Optional[uuid.UUID] = None + ) -> "subprocess.CompletedProcess[str]": + with tempfile.NamedTemporaryFile(mode="w+") as tmp: tmp.write(config_toml) tmp.flush() - cmd = ['init', f'--config={tmp.name}'] + cmd = ["init", f"--config={tmp.name}"] if initial_timeline_id: - cmd.extend(['--timeline-id', initial_timeline_id.hex]) + cmd.extend(["--timeline-id", initial_timeline_id.hex]) append_pageserver_param_overrides( params_to_update=cmd, remote_storage=self.env.remote_storage, remote_storage_users=self.env.remote_storage_users, - pageserver_config_override=self.env.pageserver.config_override) + pageserver_config_override=self.env.pageserver.config_override, + ) res = self.raw_cli(cmd) res.check_returncode() return res def pageserver_enabled_features(self) -> Any: - bin_pageserver = os.path.join(str(neon_binpath), 'pageserver') - args = [bin_pageserver, '--enabled-features'] - log.info('Running command "{}"'.format(' '.join(args))) + bin_pageserver = os.path.join(str(neon_binpath), "pageserver") + args = [bin_pageserver, "--enabled-features"] + log.info('Running command "{}"'.format(" ".join(args))) - res = subprocess.run(args, - check=True, - universal_newlines=True, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE) + res = subprocess.run( + args, + check=True, + universal_newlines=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) log.info(f"pageserver_enabled_features success: {res.stdout}") return json.loads(res.stdout) - def pageserver_start(self, overrides=()) -> 'subprocess.CompletedProcess[str]': - start_args = ['pageserver', 'start', *overrides] + def pageserver_start(self, overrides=()) -> "subprocess.CompletedProcess[str]": + start_args = ["pageserver", "start", *overrides] append_pageserver_param_overrides( params_to_update=start_args, remote_storage=self.env.remote_storage, remote_storage_users=self.env.remote_storage_users, - pageserver_config_override=self.env.pageserver.config_override) + pageserver_config_override=self.env.pageserver.config_override, + ) s3_env_vars = None if self.env.remote_storage is not None and isinstance(self.env.remote_storage, S3Storage): @@ -1350,29 +1397,29 @@ class NeonCli(AbstractNeonCli): return self.raw_cli(start_args, extra_env_vars=s3_env_vars) - def pageserver_stop(self, immediate=False) -> 'subprocess.CompletedProcess[str]': - cmd = ['pageserver', 'stop'] + def pageserver_stop(self, immediate=False) -> "subprocess.CompletedProcess[str]": + cmd = ["pageserver", "stop"] if immediate: - cmd.extend(['-m', 'immediate']) + cmd.extend(["-m", "immediate"]) log.info(f"Stopping pageserver with {cmd}") return self.raw_cli(cmd) - def safekeeper_start(self, id: int) -> 'subprocess.CompletedProcess[str]': + def safekeeper_start(self, id: int) -> "subprocess.CompletedProcess[str]": s3_env_vars = None if self.env.remote_storage is not None and isinstance(self.env.remote_storage, S3Storage): s3_env_vars = self.env.remote_storage.access_env_vars() - return self.raw_cli(['safekeeper', 'start', str(id)], extra_env_vars=s3_env_vars) + return self.raw_cli(["safekeeper", "start", str(id)], extra_env_vars=s3_env_vars) - def safekeeper_stop(self, - id: Optional[int] = None, - immediate=False) -> 'subprocess.CompletedProcess[str]': - args = ['safekeeper', 'stop'] + def safekeeper_stop( + self, id: Optional[int] = None, immediate=False + ) -> "subprocess.CompletedProcess[str]": + args = ["safekeeper", "stop"] if id is not None: args.append(str(id)) if immediate: - args.extend(['-m', 'immediate']) + args.extend(["-m", "immediate"]) return self.raw_cli(args) def pg_create( @@ -1382,19 +1429,19 @@ class NeonCli(AbstractNeonCli): tenant_id: Optional[uuid.UUID] = None, lsn: Optional[str] = None, port: Optional[int] = None, - ) -> 'subprocess.CompletedProcess[str]': + ) -> "subprocess.CompletedProcess[str]": args = [ - 'pg', - 'create', - '--tenant-id', + "pg", + "create", + "--tenant-id", (tenant_id or self.env.initial_tenant).hex, - '--branch-name', + "--branch-name", branch_name, ] if lsn is not None: - args.extend(['--lsn', lsn]) + args.extend(["--lsn", lsn]) if port is not None: - args.extend(['--port', str(port)]) + args.extend(["--port", str(port)]) if node_name is not None: args.append(node_name) @@ -1408,17 +1455,17 @@ class NeonCli(AbstractNeonCli): tenant_id: Optional[uuid.UUID] = None, lsn: Optional[str] = None, port: Optional[int] = None, - ) -> 'subprocess.CompletedProcess[str]': + ) -> "subprocess.CompletedProcess[str]": args = [ - 'pg', - 'start', - '--tenant-id', + "pg", + "start", + "--tenant-id", (tenant_id or self.env.initial_tenant).hex, ] if lsn is not None: - args.append(f'--lsn={lsn}') + args.append(f"--lsn={lsn}") if port is not None: - args.append(f'--port={port}') + args.append(f"--port={port}") if node_name is not None: args.append(node_name) @@ -1432,15 +1479,15 @@ class NeonCli(AbstractNeonCli): tenant_id: Optional[uuid.UUID] = None, destroy=False, check_return_code=True, - ) -> 'subprocess.CompletedProcess[str]': + ) -> "subprocess.CompletedProcess[str]": args = [ - 'pg', - 'stop', - '--tenant-id', + "pg", + "stop", + "--tenant-id", (tenant_id or self.env.initial_tenant).hex, ] if destroy: - args.append('--destroy') + args.append("--destroy") if node_name is not None: args.append(node_name) @@ -1453,12 +1500,12 @@ class WalCraft(AbstractNeonCli): Supports main commands via typed methods and a way to run arbitrary command directly via CLI. """ - COMMAND = 'wal_craft' + COMMAND = "wal_craft" def postgres_config(self) -> List[str]: res = self.raw_cli(["print-postgres-config"]) res.check_returncode() - return res.stdout.split('\n') + return res.stdout.split("\n") def in_existing(self, type: str, connection: str) -> None: res = self.raw_cli(["in-existing", type, connection]) @@ -1471,14 +1518,15 @@ class NeonPageserver(PgProtocol): Initializes the repository via `neon init`. """ + def __init__(self, env: NeonEnv, port: PageserverPort, config_override: Optional[str] = None): - super().__init__(host='localhost', port=port.pg, user='cloud_admin') + super().__init__(host="localhost", port=port.pg, user="cloud_admin") self.env = env self.running = False self.service_port = port self.config_override = config_override - def start(self, overrides=()) -> 'NeonPageserver': + def start(self, overrides=()) -> "NeonPageserver": """ Start the page server. `overrides` allows to add some config to this pageserver start. @@ -1490,7 +1538,7 @@ class NeonPageserver(PgProtocol): self.running = True return self - def stop(self, immediate=False) -> 'NeonPageserver': + def stop(self, immediate=False) -> "NeonPageserver": """ Stop the page server. Returns self. @@ -1523,31 +1571,33 @@ def append_pageserver_param_overrides( remote_storage_toml_table = remote_storage_to_toml_inline_table(remote_storage) params_to_update.append( - f'--pageserver-config-override=remote_storage={remote_storage_toml_table}') + f"--pageserver-config-override=remote_storage={remote_storage_toml_table}" + ) - env_overrides = os.getenv('ZENITH_PAGESERVER_OVERRIDES') + env_overrides = os.getenv("ZENITH_PAGESERVER_OVERRIDES") if env_overrides is not None: params_to_update += [ - f'--pageserver-config-override={o.strip()}' for o in env_overrides.split(';') + f"--pageserver-config-override={o.strip()}" for o in env_overrides.split(";") ] if pageserver_config_override is not None: params_to_update += [ - f'--pageserver-config-override={o.strip()}' - for o in pageserver_config_override.split(';') + f"--pageserver-config-override={o.strip()}" + for o in pageserver_config_override.split(";") ] class PgBin: - """ A helper class for executing postgres binaries """ + """A helper class for executing postgres binaries""" + def __init__(self, log_dir: Path): self.log_dir = log_dir - self.pg_bin_path = os.path.join(str(pg_distrib_dir), 'bin') + self.pg_bin_path = os.path.join(str(pg_distrib_dir), "bin") self.env = os.environ.copy() - self.env['LD_LIBRARY_PATH'] = os.path.join(str(pg_distrib_dir), 'lib') + self.env["LD_LIBRARY_PATH"] = os.path.join(str(pg_distrib_dir), "lib") def _fixpath(self, command: List[str]): - if '/' not in command[0]: + if "/" not in command[0]: command[0] = os.path.join(self.pg_bin_path, command[0]) def _build_env(self, env_add: Optional[Env]) -> Env: @@ -1572,15 +1622,17 @@ class PgBin: """ self._fixpath(command) - log.info('Running command "{}"'.format(' '.join(command))) + log.info('Running command "{}"'.format(" ".join(command))) env = self._build_env(env) subprocess.run(command, env=env, cwd=cwd, check=True) - def run_capture(self, - command: List[str], - env: Optional[Env] = None, - cwd: Optional[str] = None, - **kwargs: Any) -> str: + def run_capture( + self, + command: List[str], + env: Optional[Env] = None, + cwd: Optional[str] = None, + **kwargs: Any, + ) -> str: """ Run one of the postgres binaries, with stderr and stdout redirected to a file. @@ -1589,35 +1641,32 @@ class PgBin: """ self._fixpath(command) - log.info('Running command "{}"'.format(' '.join(command))) + log.info('Running command "{}"'.format(" ".join(command))) env = self._build_env(env) - return subprocess_capture(str(self.log_dir), - command, - env=env, - cwd=cwd, - check=True, - **kwargs) + return subprocess_capture( + str(self.log_dir), command, env=env, cwd=cwd, check=True, **kwargs + ) -@pytest.fixture(scope='function') +@pytest.fixture(scope="function") def pg_bin(test_output_dir: Path) -> PgBin: return PgBin(test_output_dir) class VanillaPostgres(PgProtocol): def __init__(self, pgdatadir: Path, pg_bin: PgBin, port: int, init=True): - super().__init__(host='localhost', port=port, dbname='postgres') + super().__init__(host="localhost", port=port, dbname="postgres") self.pgdatadir = pgdatadir self.pg_bin = pg_bin self.running = False if init: - self.pg_bin.run_capture(['initdb', '-D', str(pgdatadir)]) + self.pg_bin.run_capture(["initdb", "-D", str(pgdatadir)]) self.configure([f"port = {port}\n"]) def configure(self, options: List[str]): """Append lines into postgresql.conf file.""" assert not self.running - with open(os.path.join(self.pgdatadir, 'postgresql.conf'), 'a') as conf_file: + with open(os.path.join(self.pgdatadir, "postgresql.conf"), "a") as conf_file: conf_file.write("\n".join(options)) def start(self, log_path: Optional[str] = None): @@ -1628,12 +1677,13 @@ class VanillaPostgres(PgProtocol): log_path = os.path.join(self.pgdatadir, "pg.log") self.pg_bin.run_capture( - ['pg_ctl', '-w', '-D', str(self.pgdatadir), '-l', log_path, 'start']) + ["pg_ctl", "-w", "-D", str(self.pgdatadir), "-l", log_path, "start"] + ) def stop(self): assert self.running self.running = False - self.pg_bin.run_capture(['pg_ctl', '-w', '-D', str(self.pgdatadir), 'stop']) + self.pg_bin.run_capture(["pg_ctl", "-w", "-D", str(self.pgdatadir), "stop"]) def get_subdir_size(self, subdir) -> int: """Return size of pgdatadir subdirectory in bytes.""" @@ -1647,9 +1697,10 @@ class VanillaPostgres(PgProtocol): self.stop() -@pytest.fixture(scope='function') -def vanilla_pg(test_output_dir: Path, - port_distributor: PortDistributor) -> Iterator[VanillaPostgres]: +@pytest.fixture(scope="function") +def vanilla_pg( + test_output_dir: Path, port_distributor: PortDistributor +) -> Iterator[VanillaPostgres]: pgdatadir = test_output_dir / "pgdata-vanilla" pg_bin = PgBin(test_output_dir) port = port_distributor.get_port() @@ -1665,18 +1716,18 @@ class RemotePostgres(PgProtocol): self.running = True def configure(self, options: List[str]): - raise Exception('cannot change configuration of remote Posgres instance') + raise Exception("cannot change configuration of remote Posgres instance") def start(self): - raise Exception('cannot start a remote Postgres instance') + raise Exception("cannot start a remote Postgres instance") def stop(self): - raise Exception('cannot stop a remote Postgres instance') + raise Exception("cannot stop a remote Postgres instance") def get_subdir_size(self, subdir) -> int: # TODO: Could use the server's Generic File Access functions if superuser. # See https://www.postgresql.org/docs/14/functions-admin.html#FUNCTIONS-ADMIN-GENFILE - raise Exception('cannot get size of a Postgres instance') + raise Exception("cannot get size of a Postgres instance") def __enter__(self): return self @@ -1686,7 +1737,7 @@ class RemotePostgres(PgProtocol): pass -@pytest.fixture(scope='function') +@pytest.fixture(scope="function") def remote_pg(test_output_dir: Path) -> Iterator[RemotePostgres]: pg_bin = PgBin(test_output_dir) @@ -1701,7 +1752,7 @@ def remote_pg(test_output_dir: Path) -> Iterator[RemotePostgres]: class NeonProxy(PgProtocol): def __init__(self, proxy_port: int, http_port: int, auth_endpoint: str): super().__init__(dsn=auth_endpoint, port=proxy_port) - self.host = '127.0.0.1' + self.host = "127.0.0.1" self.http_port = http_port self.proxy_port = proxy_port self.auth_endpoint = auth_endpoint @@ -1712,7 +1763,7 @@ class NeonProxy(PgProtocol): # Start proxy args = [ - os.path.join(str(neon_binpath), 'proxy'), + os.path.join(str(neon_binpath), "proxy"), *["--http", f"{self.host}:{self.http_port}"], *["--proxy", f"{self.host}:{self.proxy_port}"], *["--auth-backend", "postgres"], @@ -1735,7 +1786,7 @@ class NeonProxy(PgProtocol): self._popen.kill() -@pytest.fixture(scope='function') +@pytest.fixture(scope="function") def static_proxy(vanilla_pg, port_distributor) -> Iterator[NeonProxy]: """Neon proxy that routes directly to vanilla postgres.""" @@ -1743,28 +1794,28 @@ def static_proxy(vanilla_pg, port_distributor) -> Iterator[NeonProxy]: vanilla_pg.start() vanilla_pg.safe_psql("create user proxy with login superuser password 'password'") - port = vanilla_pg.default_options['port'] - host = vanilla_pg.default_options['host'] - dbname = vanilla_pg.default_options['dbname'] - auth_endpoint = f'postgres://proxy:password@{host}:{port}/{dbname}' + port = vanilla_pg.default_options["port"] + host = vanilla_pg.default_options["host"] + dbname = vanilla_pg.default_options["dbname"] + auth_endpoint = f"postgres://proxy:password@{host}:{port}/{dbname}" proxy_port = port_distributor.get_port() http_port = port_distributor.get_port() - with NeonProxy(proxy_port=proxy_port, http_port=http_port, - auth_endpoint=auth_endpoint) as proxy: + with NeonProxy( + proxy_port=proxy_port, http_port=http_port, auth_endpoint=auth_endpoint + ) as proxy: proxy.start() yield proxy class Postgres(PgProtocol): - """ An object representing a running postgres daemon. """ - def __init__(self, - env: NeonEnv, - tenant_id: uuid.UUID, - port: int, - check_stop_result: bool = True): - super().__init__(host='localhost', port=port, user='cloud_admin', dbname='postgres') + """An object representing a running postgres daemon.""" + + def __init__( + self, env: NeonEnv, tenant_id: uuid.UUID, port: int, check_stop_result: bool = True + ): + super().__init__(host="localhost", port=port, user="cloud_admin", dbname="postgres") self.env = env self.running = False self.node_name: Optional[str] = None # dubious, see asserts below @@ -1780,7 +1831,7 @@ class Postgres(PgProtocol): node_name: Optional[str] = None, lsn: Optional[str] = None, config_lines: Optional[List[str]] = None, - ) -> 'Postgres': + ) -> "Postgres": """ Create the pg data directory. Returns self. @@ -1789,13 +1840,11 @@ class Postgres(PgProtocol): if not config_lines: config_lines = [] - self.node_name = node_name or f'{branch_name}_pg_node' - self.env.neon_cli.pg_create(branch_name, - node_name=self.node_name, - tenant_id=self.tenant_id, - lsn=lsn, - port=self.port) - path = pathlib.Path('pgdatadirs') / 'tenants' / self.tenant_id.hex / self.node_name + self.node_name = node_name or f"{branch_name}_pg_node" + self.env.neon_cli.pg_create( + branch_name, node_name=self.node_name, tenant_id=self.tenant_id, lsn=lsn, port=self.port + ) + path = pathlib.Path("pgdatadirs") / "tenants" / self.tenant_id.hex / self.node_name self.pgdata_dir = os.path.join(self.env.repo_dir, path) if config_lines is None: @@ -1803,12 +1852,12 @@ class Postgres(PgProtocol): # set small 'max_replication_write_lag' to enable backpressure # and make tests more stable. - config_lines = ['max_replication_write_lag=15MB'] + config_lines + config_lines = ["max_replication_write_lag=15MB"] + config_lines self.config(config_lines) return self - def start(self) -> 'Postgres': + def start(self) -> "Postgres": """ Start the Postgres instance. Returns self. @@ -1818,32 +1867,32 @@ class Postgres(PgProtocol): log.info(f"Starting postgres node {self.node_name}") - run_result = self.env.neon_cli.pg_start(self.node_name, - tenant_id=self.tenant_id, - port=self.port) + run_result = self.env.neon_cli.pg_start( + self.node_name, tenant_id=self.tenant_id, port=self.port + ) self.running = True return self def pg_data_dir_path(self) -> str: - """ Path to data directory """ + """Path to data directory""" assert self.node_name - path = pathlib.Path('pgdatadirs') / 'tenants' / self.tenant_id.hex / self.node_name + path = pathlib.Path("pgdatadirs") / "tenants" / self.tenant_id.hex / self.node_name return os.path.join(self.env.repo_dir, path) def pg_xact_dir_path(self) -> str: - """ Path to pg_xact dir """ - return os.path.join(self.pg_data_dir_path(), 'pg_xact') + """Path to pg_xact dir""" + return os.path.join(self.pg_data_dir_path(), "pg_xact") def pg_twophase_dir_path(self) -> str: - """ Path to pg_twophase dir """ - return os.path.join(self.pg_data_dir_path(), 'pg_twophase') + """Path to pg_twophase dir""" + return os.path.join(self.pg_data_dir_path(), "pg_twophase") def config_file_path(self) -> str: - """ Path to postgresql.conf """ - return os.path.join(self.pg_data_dir_path(), 'postgresql.conf') + """Path to postgresql.conf""" + return os.path.join(self.pg_data_dir_path(), "postgresql.conf") - def adjust_for_safekeepers(self, safekeepers: str) -> 'Postgres': + def adjust_for_safekeepers(self, safekeepers: str) -> "Postgres": """ Adjust instance config for working with wal acceptors instead of pageserver (pre-configured by CLI) directly. @@ -1855,30 +1904,33 @@ class Postgres(PgProtocol): with open(self.config_file_path(), "w") as f: for cfg_line in cfg_lines: # walproposer uses different application_name - if ("synchronous_standby_names" in cfg_line or - # don't repeat safekeepers/wal_acceptors multiple times - "neon.safekeepers" in cfg_line): + if ( + "synchronous_standby_names" in cfg_line + or + # don't repeat safekeepers/wal_acceptors multiple times + "neon.safekeepers" in cfg_line + ): continue f.write(cfg_line) f.write("synchronous_standby_names = 'walproposer'\n") f.write("neon.safekeepers = '{}'\n".format(safekeepers)) return self - def config(self, lines: List[str]) -> 'Postgres': + def config(self, lines: List[str]) -> "Postgres": """ Add lines to postgresql.conf. Lines should be an array of valid postgresql.conf rows. Returns self. """ - with open(self.config_file_path(), 'a') as conf: + with open(self.config_file_path(), "a") as conf: for line in lines: conf.write(line) - conf.write('\n') + conf.write("\n") return self - def stop(self) -> 'Postgres': + def stop(self) -> "Postgres": """ Stop the Postgres instance if it's running. Returns self. @@ -1886,24 +1938,23 @@ class Postgres(PgProtocol): if self.running: assert self.node_name is not None - self.env.neon_cli.pg_stop(self.node_name, - self.tenant_id, - check_return_code=self.check_stop_result) + self.env.neon_cli.pg_stop( + self.node_name, self.tenant_id, check_return_code=self.check_stop_result + ) self.running = False return self - def stop_and_destroy(self) -> 'Postgres': + def stop_and_destroy(self) -> "Postgres": """ Stop the Postgres instance, then destroy it. Returns self. """ assert self.node_name is not None - self.env.neon_cli.pg_stop(self.node_name, - self.tenant_id, - True, - check_return_code=self.check_stop_result) + self.env.neon_cli.pg_stop( + self.node_name, self.tenant_id, True, check_return_code=self.check_stop_result + ) self.node_name = None self.running = False @@ -1915,7 +1966,7 @@ class Postgres(PgProtocol): node_name: Optional[str] = None, lsn: Optional[str] = None, config_lines: Optional[List[str]] = None, - ) -> 'Postgres': + ) -> "Postgres": """ Create a Postgres instance, apply config and then start it. @@ -1943,18 +1994,21 @@ class Postgres(PgProtocol): class PostgresFactory: - """ An object representing multiple running postgres daemons. """ + """An object representing multiple running postgres daemons.""" + def __init__(self, env: NeonEnv): self.env = env self.num_instances = 0 self.instances: List[Postgres] = [] - def create_start(self, - branch_name: str, - node_name: Optional[str] = None, - tenant_id: Optional[uuid.UUID] = None, - lsn: Optional[str] = None, - config_lines: Optional[List[str]] = None) -> Postgres: + def create_start( + self, + branch_name: str, + node_name: Optional[str] = None, + tenant_id: Optional[uuid.UUID] = None, + lsn: Optional[str] = None, + config_lines: Optional[List[str]] = None, + ) -> Postgres: pg = Postgres( self.env, @@ -1971,12 +2025,14 @@ class PostgresFactory: lsn=lsn, ) - def create(self, - branch_name: str, - node_name: Optional[str] = None, - tenant_id: Optional[uuid.UUID] = None, - lsn: Optional[str] = None, - config_lines: Optional[List[str]] = None) -> Postgres: + def create( + self, + branch_name: str, + node_name: Optional[str] = None, + tenant_id: Optional[uuid.UUID] = None, + lsn: Optional[str] = None, + config_lines: Optional[List[str]] = None, + ) -> Postgres: pg = Postgres( self.env, @@ -1994,7 +2050,7 @@ class PostgresFactory: config_lines=config_lines, ) - def stop_all(self) -> 'PostgresFactory': + def stop_all(self) -> "PostgresFactory": for pg in self.instances: pg.stop() @@ -2002,7 +2058,7 @@ class PostgresFactory: def read_pid(path: Path) -> int: - """ Read content of file into number """ + """Read content of file into number""" return int(path.read_text()) @@ -2014,13 +2070,14 @@ class SafekeeperPort: @dataclass class Safekeeper: - """ An object representing a running safekeeper daemon. """ + """An object representing a running safekeeper daemon.""" + env: NeonEnv port: SafekeeperPort id: int running: bool = False - def start(self) -> 'Safekeeper': + def start(self) -> "Safekeeper": assert self.running == False self.env.neon_cli.safekeeper_start(self.id) self.running = True @@ -2034,22 +2091,22 @@ class Safekeeper: elapsed = time.time() - started_at if elapsed > 3: raise RuntimeError( - f"timed out waiting {elapsed:.0f}s for wal acceptor start: {e}") + f"timed out waiting {elapsed:.0f}s for wal acceptor start: {e}" + ) time.sleep(0.5) else: break # success return self - def stop(self, immediate=False) -> 'Safekeeper': - log.info('Stopping safekeeper {}'.format(self.id)) + def stop(self, immediate=False) -> "Safekeeper": + log.info("Stopping safekeeper {}".format(self.id)) self.env.neon_cli.safekeeper_stop(self.id, immediate) self.running = False return self - def append_logical_message(self, - tenant_id: uuid.UUID, - timeline_id: uuid.UUID, - request: Dict[str, Any]) -> Dict[str, Any]: + def append_logical_message( + self, tenant_id: uuid.UUID, timeline_id: uuid.UUID, request: Dict[str, Any] + ) -> Dict[str, Any]: """ Send JSON_CTRL query to append LogicalMessage to WAL and modify safekeeper state. It will construct LogicalMessage from provided @@ -2106,7 +2163,7 @@ class SafekeeperHttpClient(requests.Session): self.auth_token = auth_token if auth_token is not None: - self.headers['Authorization'] = f'Bearer {auth_token}' + self.headers["Authorization"] = f"Bearer {auth_token}" def check_status(self): self.get(f"http://localhost:{self.port}/v1/status").raise_for_status() @@ -2115,21 +2172,25 @@ class SafekeeperHttpClient(requests.Session): res = self.get(f"http://localhost:{self.port}/v1/tenant/{tenant_id}/timeline/{timeline_id}") res.raise_for_status() resj = res.json() - return SafekeeperTimelineStatus(acceptor_epoch=resj['acceptor_state']['epoch'], - flush_lsn=resj['flush_lsn'], - timeline_start_lsn=resj['timeline_start_lsn'], - backup_lsn=resj['backup_lsn'], - remote_consistent_lsn=resj['remote_consistent_lsn']) + return SafekeeperTimelineStatus( + acceptor_epoch=resj["acceptor_state"]["epoch"], + flush_lsn=resj["flush_lsn"], + timeline_start_lsn=resj["timeline_start_lsn"], + backup_lsn=resj["backup_lsn"], + remote_consistent_lsn=resj["remote_consistent_lsn"], + ) def record_safekeeper_info(self, tenant_id: str, timeline_id: str, body): res = self.post( f"http://localhost:{self.port}/v1/record_safekeeper_info/{tenant_id}/{timeline_id}", - json=body) + json=body, + ) res.raise_for_status() def timeline_delete_force(self, tenant_id: str, timeline_id: str) -> Dict[Any, Any]: res = self.delete( - f"http://localhost:{self.port}/v1/tenant/{tenant_id}/timeline/{timeline_id}") + f"http://localhost:{self.port}/v1/tenant/{tenant_id}/timeline/{timeline_id}" + ) res.raise_for_status() res_json = res.json() assert isinstance(res_json, dict) @@ -2152,21 +2213,24 @@ class SafekeeperHttpClient(requests.Session): metrics = SafekeeperMetrics() for match in re.finditer( - r'^safekeeper_flush_lsn{tenant_id="([0-9a-f]+)",timeline_id="([0-9a-f]+)"} (\S+)$', - all_metrics_text, - re.MULTILINE): + r'^safekeeper_flush_lsn{tenant_id="([0-9a-f]+)",timeline_id="([0-9a-f]+)"} (\S+)$', + all_metrics_text, + re.MULTILINE, + ): metrics.flush_lsn_inexact[(match.group(1), match.group(2))] = int(match.group(3)) for match in re.finditer( - r'^safekeeper_commit_lsn{tenant_id="([0-9a-f]+)",timeline_id="([0-9a-f]+)"} (\S+)$', - all_metrics_text, - re.MULTILINE): + r'^safekeeper_commit_lsn{tenant_id="([0-9a-f]+)",timeline_id="([0-9a-f]+)"} (\S+)$', + all_metrics_text, + re.MULTILINE, + ): metrics.commit_lsn_inexact[(match.group(1), match.group(2))] = int(match.group(3)) return metrics @dataclass class Etcd: - """ An object managing etcd instance """ + """An object managing etcd instance""" + datadir: str port: int peer_port: int @@ -2177,16 +2241,16 @@ class Etcd: self.binary_path = etcd_path() def client_url(self): - return f'http://127.0.0.1:{self.port}' + return f"http://127.0.0.1:{self.port}" def check_status(self): with requests.Session() as s: - s.mount('http://', requests.adapters.HTTPAdapter(max_retries=1)) # do not retry + s.mount("http://", requests.adapters.HTTPAdapter(max_retries=1)) # do not retry s.get(f"{self.client_url()}/health").raise_for_status() def try_start(self): if self.handle is not None: - log.debug(f'etcd is already running on port {self.port}') + log.debug(f"etcd is already running on port {self.port}") return pathlib.Path(self.datadir).mkdir(exist_ok=True) @@ -2206,7 +2270,7 @@ class Etcd: # Set --quota-backend-bytes to keep the etcd virtual memory # size smaller. Our test etcd clusters are very small. # See https://github.com/etcd-io/etcd/issues/7910 - f"--quota-backend-bytes=100000000" + f"--quota-backend-bytes=100000000", ] self.handle = subprocess.Popen(args, stdout=log_file, stderr=log_file) @@ -2230,21 +2294,23 @@ class Etcd: def get_test_output_dir(request: Any) -> pathlib.Path: - """ Compute the working directory for an individual test. """ + """Compute the working directory for an individual test.""" test_name = request.node.name test_dir = pathlib.Path(top_output_dir) / test_name.replace("/", "-") - log.info(f'get_test_output_dir is {test_dir}') + log.info(f"get_test_output_dir is {test_dir}") # make mypy happy assert isinstance(test_dir, pathlib.Path) return test_dir -ATTACHMENT_SUFFIXES = frozenset(( - '.log', - '.stderr', - '.stdout', - '.diffs', -)) +ATTACHMENT_SUFFIXES = frozenset( + ( + ".log", + ".stderr", + ".stdout", + ".diffs", + ) +) # This is autouse, so the test output directory always gets created, even @@ -2256,51 +2322,59 @@ ATTACHMENT_SUFFIXES = frozenset(( # scope. So it uses the get_test_output_dir() function to get the path, and # this fixture ensures that the directory exists. That works because # 'autouse' fixtures are run before other fixtures. -@pytest.fixture(scope='function', autouse=True) +@pytest.fixture(scope="function", autouse=True) def test_output_dir(request: Any) -> Iterator[pathlib.Path]: - """ Create the working directory for an individual test. """ + """Create the working directory for an individual test.""" # one directory per test test_dir = get_test_output_dir(request) - log.info(f'test_output_dir is {test_dir}') + log.info(f"test_output_dir is {test_dir}") shutil.rmtree(test_dir, ignore_errors=True) test_dir.mkdir() yield test_dir - for attachment in test_dir.glob('**/*'): + for attachment in test_dir.glob("**/*"): if attachment.suffix in ATTACHMENT_SUFFIXES: source = str(attachment) name = str(attachment.relative_to(test_dir)) - attachment_type = 'text/plain' - extension = attachment.suffix.removeprefix('.') + attachment_type = "text/plain" + extension = attachment.suffix.removeprefix(".") # compress files larger than 1Mb, they're hardly readable in a browser if attachment.stat().st_size > 1024 * 1024: - source = f'{attachment}.tar.gz' - with tarfile.open(source, 'w:gz') as tar: + source = f"{attachment}.tar.gz" + with tarfile.open(source, "w:gz") as tar: tar.add(attachment, arcname=attachment.name) - name = f'{name}.tar.gz' - attachment_type = 'application/gzip' - extension = 'tar.gz' + name = f"{name}.tar.gz" + attachment_type = "application/gzip" + extension = "tar.gz" allure.attach.file(source, name, attachment_type, extension) -SKIP_DIRS = frozenset(('pg_wal', - 'pg_stat', - 'pg_stat_tmp', - 'pg_subtrans', - 'pg_logical', - 'pg_replslot/wal_proposer_slot')) +SKIP_DIRS = frozenset( + ( + "pg_wal", + "pg_stat", + "pg_stat_tmp", + "pg_subtrans", + "pg_logical", + "pg_replslot/wal_proposer_slot", + ) +) -SKIP_FILES = frozenset(('pg_internal.init', - 'pg.log', - 'zenith.signal', - 'postgresql.conf', - 'postmaster.opts', - 'postmaster.pid', - 'pg_control')) +SKIP_FILES = frozenset( + ( + "pg_internal.init", + "pg.log", + "zenith.signal", + "postgresql.conf", + "postmaster.opts", + "postmaster.pid", + "pg_control", + ) +) def should_skip_dir(dirname: str) -> bool: @@ -2312,10 +2386,10 @@ def should_skip_file(filename: str) -> bool: return True # check for temp table files according to https://www.postgresql.org/docs/current/storage-file-layout.html # i e "tBBB_FFF" - if not filename.startswith('t'): + if not filename.startswith("t"): return False - tmp_name = filename[1:].split('_') + tmp_name = filename[1:].split("_") if len(tmp_name) != 2: return False @@ -2358,7 +2432,7 @@ def check_restored_datadir_content(test_output_dir: Path, env: NeonEnv, pg: Post restored_dir_path.mkdir(exist_ok=True) pg_bin = PgBin(test_output_dir) - psql_path = os.path.join(pg_bin.pg_bin_path, 'psql') + psql_path = os.path.join(pg_bin.pg_bin_path, "psql") cmd = rf""" {psql_path} \ @@ -2370,12 +2444,12 @@ def check_restored_datadir_content(test_output_dir: Path, env: NeonEnv, pg: Post # Set LD_LIBRARY_PATH in the env properly, otherwise we may use the wrong libpq. # PgBin sets it automatically, but here we need to pipe psql output to the tar command. - psql_env = {'LD_LIBRARY_PATH': os.path.join(str(pg_distrib_dir), 'lib')} + psql_env = {"LD_LIBRARY_PATH": os.path.join(str(pg_distrib_dir), "lib")} result = subprocess.run(cmd, env=psql_env, capture_output=True, text=True, shell=True) # Print captured stdout/stderr if basebackup cmd failed. if result.returncode != 0: - log.error('Basebackup shell command failed with:') + log.error("Basebackup shell command failed with:") log.error(result.stdout) log.error(result.stderr) assert result.returncode == 0 @@ -2392,11 +2466,10 @@ def check_restored_datadir_content(test_output_dir: Path, env: NeonEnv, pg: Post # filecmp returns (match, mismatch, error) lists # We've already filtered all mismatching files in list_files_to_compare(), # so here expect that the content is identical - (match, mismatch, error) = filecmp.cmpfiles(pg.pgdata_dir, - restored_dir_path, - pgdata_files, - shallow=False) - log.info(f'filecmp result mismatch and error lists:\n\t mismatch={mismatch}\n\t error={error}') + (match, mismatch, error) = filecmp.cmpfiles( + pg.pgdata_dir, restored_dir_path, pgdata_files, shallow=False + ) + log.info(f"filecmp result mismatch and error lists:\n\t mismatch={mismatch}\n\t error={error}") for f in mismatch: @@ -2404,11 +2477,11 @@ def check_restored_datadir_content(test_output_dir: Path, env: NeonEnv, pg: Post f2 = os.path.join(restored_dir_path, f) stdout_filename = "{}.filediff".format(f2) - with open(stdout_filename, 'w') as stdout_f: + with open(stdout_filename, "w") as stdout_f: subprocess.run("xxd -b {} > {}.hex ".format(f1, f1), shell=True) subprocess.run("xxd -b {} > {}.hex ".format(f2, f2), shell=True) - cmd = 'diff {}.hex {}.hex'.format(f1, f2) + cmd = "diff {}.hex {}.hex".format(f1, f2) subprocess.run([cmd], stdout=stdout_f, shell=True) assert (mismatch, error) == ([], []) @@ -2432,11 +2505,11 @@ def wait_until(number_of_iterations: int, interval: float, func): raise Exception("timed out while waiting for %s" % func) from last_exception -def assert_timeline_local(pageserver_http_client: NeonPageserverHttpClient, - tenant: uuid.UUID, - timeline: uuid.UUID): +def assert_timeline_local( + pageserver_http_client: NeonPageserverHttpClient, tenant: uuid.UUID, timeline: uuid.UUID +): timeline_detail = pageserver_http_client.timeline_detail(tenant, timeline) - assert timeline_detail.get('local', {}).get("disk_consistent_lsn"), timeline_detail + assert timeline_detail.get("local", {}).get("disk_consistent_lsn"), timeline_detail return timeline_detail @@ -2445,65 +2518,81 @@ def assert_no_in_progress_downloads_for_tenant( tenant: uuid.UUID, ): tenant_status = pageserver_http_client.tenant_status(tenant) - assert tenant_status['has_in_progress_downloads'] is False, tenant_status + assert tenant_status["has_in_progress_downloads"] is False, tenant_status -def remote_consistent_lsn(pageserver_http_client: NeonPageserverHttpClient, - tenant: uuid.UUID, - timeline: uuid.UUID) -> int: +def remote_consistent_lsn( + pageserver_http_client: NeonPageserverHttpClient, tenant: uuid.UUID, timeline: uuid.UUID +) -> int: detail = pageserver_http_client.timeline_detail(tenant, timeline) - if detail['remote'] is None: + if detail["remote"] is None: # No remote information at all. This happens right after creating # a timeline, before any part of it has been uploaded to remote # storage yet. return 0 else: - lsn_str = detail['remote']['remote_consistent_lsn'] + lsn_str = detail["remote"]["remote_consistent_lsn"] assert isinstance(lsn_str, str) return lsn_from_hex(lsn_str) -def wait_for_upload(pageserver_http_client: NeonPageserverHttpClient, - tenant: uuid.UUID, - timeline: uuid.UUID, - lsn: int): +def wait_for_upload( + pageserver_http_client: NeonPageserverHttpClient, + tenant: uuid.UUID, + timeline: uuid.UUID, + lsn: int, +): """waits for local timeline upload up to specified lsn""" for i in range(20): current_lsn = remote_consistent_lsn(pageserver_http_client, tenant, timeline) if current_lsn >= lsn: return - log.info("waiting for remote_consistent_lsn to reach {}, now {}, iteration {}".format( - lsn_to_hex(lsn), lsn_to_hex(current_lsn), i + 1)) + log.info( + "waiting for remote_consistent_lsn to reach {}, now {}, iteration {}".format( + lsn_to_hex(lsn), lsn_to_hex(current_lsn), i + 1 + ) + ) time.sleep(1) - raise Exception("timed out while waiting for remote_consistent_lsn to reach {}, was {}".format( - lsn_to_hex(lsn), lsn_to_hex(current_lsn))) + raise Exception( + "timed out while waiting for remote_consistent_lsn to reach {}, was {}".format( + lsn_to_hex(lsn), lsn_to_hex(current_lsn) + ) + ) -def last_record_lsn(pageserver_http_client: NeonPageserverHttpClient, - tenant: uuid.UUID, - timeline: uuid.UUID) -> int: +def last_record_lsn( + pageserver_http_client: NeonPageserverHttpClient, tenant: uuid.UUID, timeline: uuid.UUID +) -> int: detail = pageserver_http_client.timeline_detail(tenant, timeline) - lsn_str = detail['local']['last_record_lsn'] + lsn_str = detail["local"]["last_record_lsn"] assert isinstance(lsn_str, str) return lsn_from_hex(lsn_str) -def wait_for_last_record_lsn(pageserver_http_client: NeonPageserverHttpClient, - tenant: uuid.UUID, - timeline: uuid.UUID, - lsn: int): +def wait_for_last_record_lsn( + pageserver_http_client: NeonPageserverHttpClient, + tenant: uuid.UUID, + timeline: uuid.UUID, + lsn: int, +): """waits for pageserver to catch up to a certain lsn""" for i in range(10): current_lsn = last_record_lsn(pageserver_http_client, tenant, timeline) if current_lsn >= lsn: return - log.info("waiting for last_record_lsn to reach {}, now {}, iteration {}".format( - lsn_to_hex(lsn), lsn_to_hex(current_lsn), i + 1)) + log.info( + "waiting for last_record_lsn to reach {}, now {}, iteration {}".format( + lsn_to_hex(lsn), lsn_to_hex(current_lsn), i + 1 + ) + ) time.sleep(1) - raise Exception("timed out while waiting for last_record_lsn to reach {}, was {}".format( - lsn_to_hex(lsn), lsn_to_hex(current_lsn))) + raise Exception( + "timed out while waiting for last_record_lsn to reach {}, was {}".format( + lsn_to_hex(lsn), lsn_to_hex(current_lsn) + ) + ) def wait_for_last_flush_lsn(env: NeonEnv, pg: Postgres, tenant: uuid.UUID, timeline: uuid.UUID): diff --git a/test_runner/fixtures/pg_stats.py b/test_runner/fixtures/pg_stats.py index e113d37248..b2e6886eb3 100644 --- a/test_runner/fixtures/pg_stats.py +++ b/test_runner/fixtures/pg_stats.py @@ -18,35 +18,43 @@ class PgStatTable: return f"SELECT {','.join(self.columns)} FROM {self.table} {self.additional_query}" -@pytest.fixture(scope='function') +@pytest.fixture(scope="function") def pg_stats_rw() -> List[PgStatTable]: return [ - PgStatTable("pg_stat_database", - ["tup_returned", "tup_fetched", "tup_inserted", "tup_updated", "tup_deleted"], - "WHERE datname='postgres'"), + PgStatTable( + "pg_stat_database", + ["tup_returned", "tup_fetched", "tup_inserted", "tup_updated", "tup_deleted"], + "WHERE datname='postgres'", + ), ] -@pytest.fixture(scope='function') +@pytest.fixture(scope="function") def pg_stats_ro() -> List[PgStatTable]: return [ - PgStatTable("pg_stat_database", ["tup_returned", "tup_fetched"], - "WHERE datname='postgres'"), + PgStatTable( + "pg_stat_database", ["tup_returned", "tup_fetched"], "WHERE datname='postgres'" + ), ] -@pytest.fixture(scope='function') +@pytest.fixture(scope="function") def pg_stats_wo() -> List[PgStatTable]: return [ - PgStatTable("pg_stat_database", ["tup_inserted", "tup_updated", "tup_deleted"], - "WHERE datname='postgres'"), + PgStatTable( + "pg_stat_database", + ["tup_inserted", "tup_updated", "tup_deleted"], + "WHERE datname='postgres'", + ), ] -@pytest.fixture(scope='function') +@pytest.fixture(scope="function") def pg_stats_wal() -> List[PgStatTable]: return [ - PgStatTable("pg_stat_wal", - ["wal_records", "wal_fpi", "wal_bytes", "wal_buffers_full", "wal_write"], - "") + PgStatTable( + "pg_stat_wal", + ["wal_records", "wal_fpi", "wal_bytes", "wal_buffers_full", "wal_write"], + "", + ) ] diff --git a/test_runner/fixtures/slow.py b/test_runner/fixtures/slow.py index c20b766a93..94199ae785 100644 --- a/test_runner/fixtures/slow.py +++ b/test_runner/fixtures/slow.py @@ -1,4 +1,5 @@ import pytest + """ This plugin allows tests to be marked as slow using pytest.mark.slow. By default slow tests are excluded. They need to be specifically requested with the --runslow flag in diff --git a/test_runner/fixtures/utils.py b/test_runner/fixtures/utils.py index a37d40014c..48889a8697 100644 --- a/test_runner/fixtures/utils.py +++ b/test_runner/fixtures/utils.py @@ -4,20 +4,19 @@ import pathlib import shutil import subprocess from pathlib import Path - from typing import Any, List, Tuple -from psycopg2.extensions import cursor from fixtures.log_helper import log +from psycopg2.extensions import cursor def get_self_dir() -> str: - """ Get the path to the directory where this script lives. """ + """Get the path to the directory where this script lives.""" return os.path.dirname(os.path.abspath(__file__)) def subprocess_capture(capture_dir: str, cmd: List[str], **kwargs: Any) -> str: - """ Run a process and capture its output + """Run a process and capture its output Output will go to files named "cmd_NNN.stdout" and "cmd_NNN.stderr" where "cmd" is the name of the program and NNN is an incrementing @@ -27,14 +26,14 @@ def subprocess_capture(capture_dir: str, cmd: List[str], **kwargs: Any) -> str: Returns basepath for files with captured output. """ assert type(cmd) is list - base = os.path.basename(cmd[0]) + '_{}'.format(global_counter()) + base = os.path.basename(cmd[0]) + "_{}".format(global_counter()) basepath = os.path.join(capture_dir, base) - stdout_filename = basepath + '.stdout' - stderr_filename = basepath + '.stderr' + stdout_filename = basepath + ".stdout" + stderr_filename = basepath + ".stderr" try: - with open(stdout_filename, 'w') as stdout_f: - with open(stderr_filename, 'w') as stderr_f: + with open(stdout_filename, "w") as stdout_f: + with open(stderr_filename, "w") as stderr_f: log.info(f'Capturing stdout to "{base}.stdout" and stderr to "{base}.stderr"') subprocess.run(cmd, **kwargs, stdout=stdout_f, stderr=stderr_f) finally: @@ -50,7 +49,7 @@ _global_counter = 0 def global_counter() -> int: - """ A really dumb global counter. + """A really dumb global counter. This is useful for giving output files a unique number, so if we run the same command multiple times we can keep their output separate. @@ -61,13 +60,13 @@ def global_counter() -> int: def lsn_to_hex(num: int) -> str: - """ Convert lsn from int to standard hex notation. """ - return "{:X}/{:X}".format(num >> 32, num & 0xffffffff) + """Convert lsn from int to standard hex notation.""" + return "{:X}/{:X}".format(num >> 32, num & 0xFFFFFFFF) def lsn_from_hex(lsn_hex: str) -> int: - """ Convert lsn from hex notation to int. """ - l, r = lsn_hex.split('/') + """Convert lsn from hex notation to int.""" + l, r = lsn_hex.split("/") return (int(l, 16) << 32) + int(r, 16) @@ -75,14 +74,16 @@ def print_gc_result(row): log.info("GC duration {elapsed} ms".format_map(row)) log.info( " total: {layers_total}, needed_by_cutoff {layers_needed_by_cutoff}, needed_by_pitr {layers_needed_by_pitr}" - " needed_by_branches: {layers_needed_by_branches}, not_updated: {layers_not_updated}, removed: {layers_removed}" - .format_map(row)) + " needed_by_branches: {layers_needed_by_branches}, not_updated: {layers_not_updated}, removed: {layers_removed}".format_map( + row + ) + ) def etcd_path() -> Path: path_output = shutil.which("etcd") if path_output is None: - raise RuntimeError('etcd not found in PATH') + raise RuntimeError("etcd not found in PATH") else: return Path(path_output) @@ -145,7 +146,12 @@ def parse_delta_layer(f_name: str) -> Tuple[int, int, int, int]: parts = f_name.split("__") key_parts = parts[0].split("-") lsn_parts = parts[1].split("-") - return int(key_parts[0], 16), int(key_parts[1], 16), int(lsn_parts[0], 16), int(lsn_parts[1], 16) + return ( + int(key_parts[0], 16), + int(key_parts[1], 16), + int(lsn_parts[0], 16), + int(lsn_parts[1], 16), + ) def get_scale_for_db(size_mb: int) -> int: diff --git a/test_runner/performance/test_branch_creation.py b/test_runner/performance/test_branch_creation.py index 1d39b0830d..9cb346de47 100644 --- a/test_runner/performance/test_branch_creation.py +++ b/test_runner/performance/test_branch_creation.py @@ -1,28 +1,26 @@ import random -import time import statistics import threading +import time import timeit -import pytest from typing import List + +import pytest from fixtures.benchmark_fixture import MetricReport from fixtures.compare_fixtures import NeonCompare from fixtures.log_helper import log def _record_branch_creation_durations(neon_compare: NeonCompare, durs: List[float]): - neon_compare.zenbenchmark.record("branch_creation_duration_max", - max(durs), - 's', - MetricReport.LOWER_IS_BETTER) - neon_compare.zenbenchmark.record("branch_creation_duration_avg", - statistics.mean(durs), - 's', - MetricReport.LOWER_IS_BETTER) - neon_compare.zenbenchmark.record("branch_creation_duration_stdev", - statistics.stdev(durs), - 's', - MetricReport.LOWER_IS_BETTER) + neon_compare.zenbenchmark.record( + "branch_creation_duration_max", max(durs), "s", MetricReport.LOWER_IS_BETTER + ) + neon_compare.zenbenchmark.record( + "branch_creation_duration_avg", statistics.mean(durs), "s", MetricReport.LOWER_IS_BETTER + ) + neon_compare.zenbenchmark.record( + "branch_creation_duration_stdev", statistics.stdev(durs), "s", MetricReport.LOWER_IS_BETTER + ) @pytest.mark.parametrize("n_branches", [20]) @@ -37,15 +35,16 @@ def test_branch_creation_heavy_write(neon_compare: NeonCompare, n_branches: int) # Use aggressive GC and checkpoint settings, so GC and compaction happen more often during the test tenant, _ = env.neon_cli.create_tenant( - conf={ - 'gc_period': '5 s', - 'gc_horizon': f'{4 * 1024 ** 2}', - 'checkpoint_distance': f'{2 * 1024 ** 2}', - 'compaction_target_size': f'{1024 ** 2}', - 'compaction_threshold': '2', - # set PITR interval to be small, so we can do GC - 'pitr_interval': '5 s' - }) + conf={ + "gc_period": "5 s", + "gc_horizon": f"{4 * 1024 ** 2}", + "checkpoint_distance": f"{2 * 1024 ** 2}", + "compaction_target_size": f"{1024 ** 2}", + "compaction_threshold": "2", + # set PITR interval to be small, so we can do GC + "pitr_interval": "5 s", + } + ) def run_pgbench(branch: str): log.info(f"Start a pgbench workload on branch {branch}") @@ -53,15 +52,15 @@ def test_branch_creation_heavy_write(neon_compare: NeonCompare, n_branches: int) pg = env.postgres.create_start(branch, tenant_id=tenant) connstr = pg.connstr() - pg_bin.run_capture(['pgbench', '-i', connstr]) - pg_bin.run_capture(['pgbench', '-c10', '-T10', connstr]) + pg_bin.run_capture(["pgbench", "-i", connstr]) + pg_bin.run_capture(["pgbench", "-c10", "-T10", connstr]) pg.stop() - env.neon_cli.create_branch('b0', tenant_id=tenant) + env.neon_cli.create_branch("b0", tenant_id=tenant) threads: List[threading.Thread] = [] - threads.append(threading.Thread(target=run_pgbench, args=('b0', ), daemon=True)) + threads.append(threading.Thread(target=run_pgbench, args=("b0",), daemon=True)) threads[-1].start() branch_creation_durations = [] @@ -72,13 +71,13 @@ def test_branch_creation_heavy_write(neon_compare: NeonCompare, n_branches: int) p = random.randint(0, i) timer = timeit.default_timer() - env.neon_cli.create_branch('b{}'.format(i + 1), 'b{}'.format(p), tenant_id=tenant) + env.neon_cli.create_branch("b{}".format(i + 1), "b{}".format(p), tenant_id=tenant) dur = timeit.default_timer() - timer log.info(f"Creating branch b{i+1} took {dur}s") branch_creation_durations.append(dur) - threads.append(threading.Thread(target=run_pgbench, args=(f'b{i+1}', ), daemon=True)) + threads.append(threading.Thread(target=run_pgbench, args=(f"b{i+1}",), daemon=True)) threads[-1].start() for thread in threads: @@ -92,10 +91,10 @@ def test_branch_creation_heavy_write(neon_compare: NeonCompare, n_branches: int) def test_branch_creation_many(neon_compare: NeonCompare, n_branches: int): env = neon_compare.env - env.neon_cli.create_branch('b0') + env.neon_cli.create_branch("b0") - pg = env.postgres.create_start('b0') - neon_compare.pg_bin.run_capture(['pgbench', '-i', '-s10', pg.connstr()]) + pg = env.postgres.create_start("b0") + neon_compare.pg_bin.run_capture(["pgbench", "-i", "-s10", pg.connstr()]) branch_creation_durations = [] @@ -103,7 +102,7 @@ def test_branch_creation_many(neon_compare: NeonCompare, n_branches: int): # random a source branch p = random.randint(0, i) timer = timeit.default_timer() - env.neon_cli.create_branch('b{}'.format(i + 1), 'b{}'.format(p)) + env.neon_cli.create_branch("b{}".format(i + 1), "b{}".format(p)) dur = timeit.default_timer() - timer branch_creation_durations.append(dur) diff --git a/test_runner/performance/test_bulk_insert.py b/test_runner/performance/test_bulk_insert.py index 6a5bad8757..9aaf0cbc77 100644 --- a/test_runner/performance/test_bulk_insert.py +++ b/test_runner/performance/test_bulk_insert.py @@ -1,8 +1,9 @@ from contextlib import closing -from fixtures.neon_fixtures import NeonEnv -from fixtures.log_helper import log + from fixtures.benchmark_fixture import MetricReport, NeonBenchmarker -from fixtures.compare_fixtures import PgCompare, VanillaCompare, NeonCompare +from fixtures.compare_fixtures import NeonCompare, PgCompare, VanillaCompare +from fixtures.log_helper import log +from fixtures.neon_fixtures import NeonEnv # @@ -23,8 +24,8 @@ def test_bulk_insert(neon_with_baseline: PgCompare): cur.execute("create table huge (i int, j int);") # Run INSERT, recording the time and I/O it takes - with env.record_pageserver_writes('pageserver_writes'): - with env.record_duration('insert'): + with env.record_pageserver_writes("pageserver_writes"): + with env.record_duration("insert"): cur.execute("insert into huge values (generate_series(1, 5000000), 0);") env.flush() diff --git a/test_runner/performance/test_bulk_tenant_create.py b/test_runner/performance/test_bulk_tenant_create.py index fe3c3afe37..cef7ce0c6b 100644 --- a/test_runner/performance/test_bulk_tenant_create.py +++ b/test_runner/performance/test_bulk_tenant_create.py @@ -1,7 +1,7 @@ import timeit -from fixtures.benchmark_fixture import MetricReport -import pytest +import pytest +from fixtures.benchmark_fixture import MetricReport from fixtures.neon_fixtures import NeonEnvBuilder # Run bulk tenant creation test. @@ -12,7 +12,7 @@ from fixtures.neon_fixtures import NeonEnvBuilder # 2. Average creation time per tenant -@pytest.mark.parametrize('tenants_count', [1, 5, 10]) +@pytest.mark.parametrize("tenants_count", [1, 5, 10]) def test_bulk_tenant_create( neon_env_builder: NeonEnvBuilder, tenants_count: int, @@ -27,22 +27,26 @@ def test_bulk_tenant_create( start = timeit.default_timer() tenant, _ = env.neon_cli.create_tenant() - env.neon_cli.create_timeline(f'test_bulk_tenant_create_{tenants_count}_{i}', - tenant_id=tenant) + env.neon_cli.create_timeline( + f"test_bulk_tenant_create_{tenants_count}_{i}", tenant_id=tenant + ) # FIXME: We used to start new safekeepers here. Did that make sense? Should we do it now? - #if use_safekeepers == 'with_sa': + # if use_safekeepers == 'with_sa': # wa_factory.start_n_new(3) - pg_tenant = env.postgres.create_start(f'test_bulk_tenant_create_{tenants_count}_{i}', - tenant_id=tenant) + pg_tenant = env.postgres.create_start( + f"test_bulk_tenant_create_{tenants_count}_{i}", tenant_id=tenant + ) end = timeit.default_timer() time_slices.append(end - start) pg_tenant.stop() - zenbenchmark.record('tenant_creation_time', - sum(time_slices) / len(time_slices), - 's', - report=MetricReport.LOWER_IS_BETTER) + zenbenchmark.record( + "tenant_creation_time", + sum(time_slices) / len(time_slices), + "s", + report=MetricReport.LOWER_IS_BETTER, + ) diff --git a/test_runner/performance/test_compare_pg_stats.py b/test_runner/performance/test_compare_pg_stats.py index b9bca90231..d39ea55fbb 100644 --- a/test_runner/performance/test_compare_pg_stats.py +++ b/test_runner/performance/test_compare_pg_stats.py @@ -6,7 +6,6 @@ from typing import List import pytest from fixtures.compare_fixtures import PgCompare from fixtures.pg_stats import PgStatTable - from performance.test_perf_pgbench import get_durations_matrix, get_scales_matrix @@ -18,85 +17,96 @@ def get_seeds_matrix(default: int = 100): @pytest.mark.parametrize("seed", get_seeds_matrix()) @pytest.mark.parametrize("scale", get_scales_matrix()) @pytest.mark.parametrize("duration", get_durations_matrix(5)) -def test_compare_pg_stats_rw_with_pgbench_default(neon_with_baseline: PgCompare, - seed: int, - scale: int, - duration: int, - pg_stats_rw: List[PgStatTable]): +def test_compare_pg_stats_rw_with_pgbench_default( + neon_with_baseline: PgCompare, + seed: int, + scale: int, + duration: int, + pg_stats_rw: List[PgStatTable], +): env = neon_with_baseline # initialize pgbench - env.pg_bin.run_capture(['pgbench', f'-s{scale}', '-i', env.pg.connstr()]) + env.pg_bin.run_capture(["pgbench", f"-s{scale}", "-i", env.pg.connstr()]) env.flush() with env.record_pg_stats(pg_stats_rw): env.pg_bin.run_capture( - ['pgbench', f'-T{duration}', f'--random-seed={seed}', env.pg.connstr()]) + ["pgbench", f"-T{duration}", f"--random-seed={seed}", env.pg.connstr()] + ) env.flush() @pytest.mark.parametrize("seed", get_seeds_matrix()) @pytest.mark.parametrize("scale", get_scales_matrix()) @pytest.mark.parametrize("duration", get_durations_matrix(5)) -def test_compare_pg_stats_wo_with_pgbench_simple_update(neon_with_baseline: PgCompare, - seed: int, - scale: int, - duration: int, - pg_stats_wo: List[PgStatTable]): +def test_compare_pg_stats_wo_with_pgbench_simple_update( + neon_with_baseline: PgCompare, + seed: int, + scale: int, + duration: int, + pg_stats_wo: List[PgStatTable], +): env = neon_with_baseline # initialize pgbench - env.pg_bin.run_capture(['pgbench', f'-s{scale}', '-i', env.pg.connstr()]) + env.pg_bin.run_capture(["pgbench", f"-s{scale}", "-i", env.pg.connstr()]) env.flush() with env.record_pg_stats(pg_stats_wo): env.pg_bin.run_capture( - ['pgbench', '-N', f'-T{duration}', f'--random-seed={seed}', env.pg.connstr()]) + ["pgbench", "-N", f"-T{duration}", f"--random-seed={seed}", env.pg.connstr()] + ) env.flush() @pytest.mark.parametrize("seed", get_seeds_matrix()) @pytest.mark.parametrize("scale", get_scales_matrix()) @pytest.mark.parametrize("duration", get_durations_matrix(5)) -def test_compare_pg_stats_ro_with_pgbench_select_only(neon_with_baseline: PgCompare, - seed: int, - scale: int, - duration: int, - pg_stats_ro: List[PgStatTable]): +def test_compare_pg_stats_ro_with_pgbench_select_only( + neon_with_baseline: PgCompare, + seed: int, + scale: int, + duration: int, + pg_stats_ro: List[PgStatTable], +): env = neon_with_baseline # initialize pgbench - env.pg_bin.run_capture(['pgbench', f'-s{scale}', '-i', env.pg.connstr()]) + env.pg_bin.run_capture(["pgbench", f"-s{scale}", "-i", env.pg.connstr()]) env.flush() with env.record_pg_stats(pg_stats_ro): env.pg_bin.run_capture( - ['pgbench', '-S', f'-T{duration}', f'--random-seed={seed}', env.pg.connstr()]) + ["pgbench", "-S", f"-T{duration}", f"--random-seed={seed}", env.pg.connstr()] + ) env.flush() @pytest.mark.parametrize("seed", get_seeds_matrix()) @pytest.mark.parametrize("scale", get_scales_matrix()) @pytest.mark.parametrize("duration", get_durations_matrix(5)) -def test_compare_pg_stats_wal_with_pgbench_default(neon_with_baseline: PgCompare, - seed: int, - scale: int, - duration: int, - pg_stats_wal: List[PgStatTable]): +def test_compare_pg_stats_wal_with_pgbench_default( + neon_with_baseline: PgCompare, + seed: int, + scale: int, + duration: int, + pg_stats_wal: List[PgStatTable], +): env = neon_with_baseline # initialize pgbench - env.pg_bin.run_capture(['pgbench', f'-s{scale}', '-i', env.pg.connstr()]) + env.pg_bin.run_capture(["pgbench", f"-s{scale}", "-i", env.pg.connstr()]) env.flush() with env.record_pg_stats(pg_stats_wal): env.pg_bin.run_capture( - ['pgbench', f'-T{duration}', f'--random-seed={seed}', env.pg.connstr()]) + ["pgbench", f"-T{duration}", f"--random-seed={seed}", env.pg.connstr()] + ) env.flush() @pytest.mark.parametrize("n_tables", [1, 10]) @pytest.mark.parametrize("duration", get_durations_matrix(10)) -def test_compare_pg_stats_wo_with_heavy_write(neon_with_baseline: PgCompare, - n_tables: int, - duration: int, - pg_stats_wo: List[PgStatTable]): +def test_compare_pg_stats_wo_with_heavy_write( + neon_with_baseline: PgCompare, n_tables: int, duration: int, pg_stats_wo: List[PgStatTable] +): env = neon_with_baseline with env.pg.connect().cursor() as cur: for i in range(n_tables): @@ -112,8 +122,7 @@ def test_compare_pg_stats_wo_with_heavy_write(neon_with_baseline: PgCompare, with env.record_pg_stats(pg_stats_wo): threads = [ - threading.Thread(target=start_single_table_workload, args=(i, )) - for i in range(n_tables) + threading.Thread(target=start_single_table_workload, args=(i,)) for i in range(n_tables) ] for thread in threads: diff --git a/test_runner/performance/test_copy.py b/test_runner/performance/test_copy.py index ad088684d5..bf4804fc07 100644 --- a/test_runner/performance/test_copy.py +++ b/test_runner/performance/test_copy.py @@ -1,11 +1,12 @@ from contextlib import closing -from fixtures.neon_fixtures import NeonEnv -from fixtures.log_helper import log -from fixtures.benchmark_fixture import MetricReport, NeonBenchmarker -from fixtures.compare_fixtures import PgCompare, VanillaCompare, NeonCompare from io import BufferedReader, RawIOBase from itertools import repeat +from fixtures.benchmark_fixture import MetricReport, NeonBenchmarker +from fixtures.compare_fixtures import NeonCompare, PgCompare, VanillaCompare +from fixtures.log_helper import log +from fixtures.neon_fixtures import NeonEnv + class CopyTestData(RawIOBase): def __init__(self, rows: int): @@ -29,7 +30,7 @@ class CopyTestData(RawIOBase): # Number of bytes to read in this call l = min(len(self.linebuf) - self.ptr, len(b)) - b[:l] = self.linebuf[self.ptr:(self.ptr + l)] + b[:l] = self.linebuf[self.ptr : (self.ptr + l)] self.ptr += l return l @@ -52,19 +53,19 @@ def test_copy(neon_with_baseline: PgCompare): # Load data with COPY, recording the time and I/O it takes. # # Since there's no data in the table previously, this extends it. - with env.record_pageserver_writes('copy_extend_pageserver_writes'): - with env.record_duration('copy_extend'): - cur.copy_from(copy_test_data(1000000), 'copytest') + with env.record_pageserver_writes("copy_extend_pageserver_writes"): + with env.record_duration("copy_extend"): + cur.copy_from(copy_test_data(1000000), "copytest") env.flush() # Delete most rows, and VACUUM to make the space available for reuse. - with env.record_pageserver_writes('delete_pageserver_writes'): - with env.record_duration('delete'): + with env.record_pageserver_writes("delete_pageserver_writes"): + with env.record_duration("delete"): cur.execute("delete from copytest where i % 100 <> 0;") env.flush() - with env.record_pageserver_writes('vacuum_pageserver_writes'): - with env.record_duration('vacuum'): + with env.record_pageserver_writes("vacuum_pageserver_writes"): + with env.record_duration("vacuum"): cur.execute("vacuum copytest") env.flush() @@ -72,9 +73,9 @@ def test_copy(neon_with_baseline: PgCompare): # by the VACUUM. # # This will also clear all the VM bits. - with env.record_pageserver_writes('copy_reuse_pageserver_writes'): - with env.record_duration('copy_reuse'): - cur.copy_from(copy_test_data(1000000), 'copytest') + with env.record_pageserver_writes("copy_reuse_pageserver_writes"): + with env.record_duration("copy_reuse"): + cur.copy_from(copy_test_data(1000000), "copytest") env.flush() env.report_peak_memory_use() diff --git a/test_runner/performance/test_dup_key.py b/test_runner/performance/test_dup_key.py index ee867a9845..60fe3014ba 100644 --- a/test_runner/performance/test_dup_key.py +++ b/test_runner/performance/test_dup_key.py @@ -1,5 +1,6 @@ -import pytest from contextlib import closing + +import pytest from fixtures.compare_fixtures import PgCompare from pytest_lazyfixture import lazy_fixture # type: ignore @@ -11,22 +12,24 @@ from pytest_lazyfixture import lazy_fixture # type: ignore pytest.param(lazy_fixture("neon_compare"), id="neon", marks=pytest.mark.slow), pytest.param(lazy_fixture("vanilla_compare"), id="vanilla", marks=pytest.mark.slow), pytest.param(lazy_fixture("remote_compare"), id="remote", marks=pytest.mark.remote_cluster), - ]) + ], +) def test_dup_key(env: PgCompare): # Update the same page many times, then measure read performance with closing(env.pg.connect()) as conn: with conn.cursor() as cur: - cur.execute('drop table if exists t, f;') + cur.execute("drop table if exists t, f;") cur.execute("SET synchronous_commit=off") cur.execute("SET statement_timeout=0") # Write many updates to the same row - with env.record_duration('write'): + with env.record_duration("write"): cur.execute("create table t (i integer, filler text);") - cur.execute('insert into t values (0);') - cur.execute(""" + cur.execute("insert into t values (0);") + cur.execute( + """ do $$ begin for ivar in 1..5000000 loop @@ -38,13 +41,14 @@ begin end loop; end; $$; -""") +""" + ) # Write 3-4 MB to evict t from compute cache - cur.execute('create table f (i integer);') - cur.execute(f'insert into f values (generate_series(1,100000));') + cur.execute("create table f (i integer);") + cur.execute(f"insert into f values (generate_series(1,100000));") # Read - with env.record_duration('read'): - cur.execute('select * from t;') + with env.record_duration("read"): + cur.execute("select * from t;") cur.fetchall() diff --git a/test_runner/performance/test_gist_build.py b/test_runner/performance/test_gist_build.py index 839eb3f57d..d8fa97fbbf 100644 --- a/test_runner/performance/test_gist_build.py +++ b/test_runner/performance/test_gist_build.py @@ -1,9 +1,10 @@ import os from contextlib import closing + from fixtures.benchmark_fixture import MetricReport -from fixtures.neon_fixtures import NeonEnv -from fixtures.compare_fixtures import PgCompare, VanillaCompare, NeonCompare +from fixtures.compare_fixtures import NeonCompare, PgCompare, VanillaCompare from fixtures.log_helper import log +from fixtures.neon_fixtures import NeonEnv # @@ -24,8 +25,8 @@ def test_gist_buffering_build(neon_with_baseline: PgCompare): ) # Build the index. - with env.record_pageserver_writes('pageserver_writes'): - with env.record_duration('build'): + with env.record_pageserver_writes("pageserver_writes"): + with env.record_duration("build"): cur.execute( "create index gist_pointidx2 on gist_point_tbl using gist(p) with (buffering = on)" ) diff --git a/test_runner/performance/test_hot_page.py b/test_runner/performance/test_hot_page.py index d3da0310ce..8e8ab9849a 100644 --- a/test_runner/performance/test_hot_page.py +++ b/test_runner/performance/test_hot_page.py @@ -1,5 +1,6 @@ -import pytest from contextlib import closing + +import pytest from fixtures.compare_fixtures import PgCompare from pytest_lazyfixture import lazy_fixture # type: ignore @@ -11,27 +12,28 @@ from pytest_lazyfixture import lazy_fixture # type: ignore pytest.param(lazy_fixture("neon_compare"), id="neon", marks=pytest.mark.slow), pytest.param(lazy_fixture("vanilla_compare"), id="vanilla", marks=pytest.mark.slow), pytest.param(lazy_fixture("remote_compare"), id="remote", marks=pytest.mark.remote_cluster), - ]) + ], +) def test_hot_page(env: PgCompare): # Update the same page many times, then measure read performance num_writes = 1000000 with closing(env.pg.connect()) as conn: with conn.cursor() as cur: - cur.execute('drop table if exists t, f;') + cur.execute("drop table if exists t, f;") # Write many updates to the same row - with env.record_duration('write'): - cur.execute('create table t (i integer);') - cur.execute('insert into t values (0);') + with env.record_duration("write"): + cur.execute("create table t (i integer);") + cur.execute("insert into t values (0);") for i in range(num_writes): - cur.execute(f'update t set i = {i};') + cur.execute(f"update t set i = {i};") # Write 3-4 MB to evict t from compute cache - cur.execute('create table f (i integer);') - cur.execute(f'insert into f values (generate_series(1,100000));') + cur.execute("create table f (i integer);") + cur.execute(f"insert into f values (generate_series(1,100000));") # Read - with env.record_duration('read'): - cur.execute('select * from t;') + with env.record_duration("read"): + cur.execute("select * from t;") cur.fetchall() diff --git a/test_runner/performance/test_hot_table.py b/test_runner/performance/test_hot_table.py index 997c772f88..2f519e152c 100644 --- a/test_runner/performance/test_hot_table.py +++ b/test_runner/performance/test_hot_table.py @@ -1,5 +1,6 @@ -import pytest from contextlib import closing + +import pytest from fixtures.compare_fixtures import PgCompare from pytest_lazyfixture import lazy_fixture # type: ignore @@ -11,7 +12,8 @@ from pytest_lazyfixture import lazy_fixture # type: ignore pytest.param(lazy_fixture("neon_compare"), id="neon", marks=pytest.mark.slow), pytest.param(lazy_fixture("vanilla_compare"), id="vanilla", marks=pytest.mark.slow), pytest.param(lazy_fixture("remote_compare"), id="remote", marks=pytest.mark.remote_cluster), - ]) + ], +) def test_hot_table(env: PgCompare): # Update a small table many times, then measure read performance num_rows = 100000 # Slightly larger than shared buffers size TODO validate @@ -20,17 +22,17 @@ def test_hot_table(env: PgCompare): with closing(env.pg.connect()) as conn: with conn.cursor() as cur: - cur.execute('drop table if exists t;') + cur.execute("drop table if exists t;") # Write many updates to a small table - with env.record_duration('write'): - cur.execute('create table t (i integer primary key);') - cur.execute(f'insert into t values (generate_series(1,{num_rows}));') + with env.record_duration("write"): + cur.execute("create table t (i integer primary key);") + cur.execute(f"insert into t values (generate_series(1,{num_rows}));") for i in range(num_writes): - cur.execute(f'update t set i = {i + num_rows} WHERE i = {i};') + cur.execute(f"update t set i = {i + num_rows} WHERE i = {i};") # Read the table - with env.record_duration('read'): + with env.record_duration("read"): for i in range(num_reads): - cur.execute('select * from t;') + cur.execute("select * from t;") cur.fetchall() diff --git a/test_runner/performance/test_parallel_copy_to.py b/test_runner/performance/test_parallel_copy_to.py index d4e74ce195..c1883dec7b 100644 --- a/test_runner/performance/test_parallel_copy_to.py +++ b/test_runner/performance/test_parallel_copy_to.py @@ -1,10 +1,11 @@ -from io import BytesIO import asyncio +from io import BytesIO + import asyncpg -from fixtures.neon_fixtures import NeonEnv, Postgres, PgProtocol -from fixtures.log_helper import log from fixtures.benchmark_fixture import MetricReport, NeonBenchmarker -from fixtures.compare_fixtures import PgCompare, VanillaCompare, NeonCompare +from fixtures.compare_fixtures import NeonCompare, PgCompare, VanillaCompare +from fixtures.log_helper import log +from fixtures.neon_fixtures import NeonEnv, PgProtocol, Postgres async def repeat_bytes(buf, repetitions: int): @@ -16,7 +17,8 @@ async def copy_test_data_to_table(pg: PgProtocol, worker_id: int, table_name: st buf = BytesIO() for i in range(1000): buf.write( - f"{i}\tLoaded by worker {worker_id}. Long string to consume some space.\n".encode()) + f"{i}\tLoaded by worker {worker_id}. Long string to consume some space.\n".encode() + ) buf.seek(0) copy_input = repeat_bytes(buf.read(), 5000) @@ -28,7 +30,7 @@ async def copy_test_data_to_table(pg: PgProtocol, worker_id: int, table_name: st async def parallel_load_different_tables(pg: PgProtocol, n_parallel: int): workers = [] for worker_id in range(n_parallel): - worker = copy_test_data_to_table(pg, worker_id, f'copytest_{worker_id}') + worker = copy_test_data_to_table(pg, worker_id, f"copytest_{worker_id}") workers.append(asyncio.create_task(worker)) # await all workers @@ -43,10 +45,10 @@ def test_parallel_copy_different_tables(neon_with_baseline: PgCompare, n_paralle cur = conn.cursor() for worker_id in range(n_parallel): - cur.execute(f'CREATE TABLE copytest_{worker_id} (i int, t text)') + cur.execute(f"CREATE TABLE copytest_{worker_id} (i int, t text)") - with env.record_pageserver_writes('pageserver_writes'): - with env.record_duration('load'): + with env.record_pageserver_writes("pageserver_writes"): + with env.record_duration("load"): asyncio.run(parallel_load_different_tables(env.pg, n_parallel)) env.flush() @@ -57,7 +59,7 @@ def test_parallel_copy_different_tables(neon_with_baseline: PgCompare, n_paralle async def parallel_load_same_table(pg: PgProtocol, n_parallel: int): workers = [] for worker_id in range(n_parallel): - worker = copy_test_data_to_table(pg, worker_id, f'copytest') + worker = copy_test_data_to_table(pg, worker_id, f"copytest") workers.append(asyncio.create_task(worker)) # await all workers @@ -70,10 +72,10 @@ def test_parallel_copy_same_table(neon_with_baseline: PgCompare, n_parallel=5): conn = env.pg.connect() cur = conn.cursor() - cur.execute(f'CREATE TABLE copytest (i int, t text)') + cur.execute(f"CREATE TABLE copytest (i int, t text)") - with env.record_pageserver_writes('pageserver_writes'): - with env.record_duration('load'): + with env.record_pageserver_writes("pageserver_writes"): + with env.record_duration("load"): asyncio.run(parallel_load_same_table(env.pg, n_parallel)) env.flush() diff --git a/test_runner/performance/test_perf_pgbench.py b/test_runner/performance/test_perf_pgbench.py index 89c510e76e..934642d095 100644 --- a/test_runner/performance/test_perf_pgbench.py +++ b/test_runner/performance/test_perf_pgbench.py @@ -30,7 +30,7 @@ def init_pgbench(env: PgCompare, cmdline): # duration is actually a metric and uses float instead of int for timestamp start_timestamp = utc_now_timestamp() t0 = timeit.default_timer() - with env.record_pageserver_writes('init.pageserver_writes'): + with env.record_pageserver_writes("init.pageserver_writes"): out = env.pg_bin.run_capture(cmdline) env.flush() @@ -49,10 +49,12 @@ def init_pgbench(env: PgCompare, cmdline): def run_pgbench(env: PgCompare, prefix: str, cmdline): - with env.record_pageserver_writes(f'{prefix}.pageserver_writes'): + with env.record_pageserver_writes(f"{prefix}.pageserver_writes"): run_start_timestamp = utc_now_timestamp() t0 = timeit.default_timer() - out = env.pg_bin.run_capture(cmdline, ) + out = env.pg_bin.run_capture( + cmdline, + ) run_duration = timeit.default_timer() - t0 run_end_timestamp = utc_now_timestamp() env.flush() @@ -78,40 +80,45 @@ def run_pgbench(env: PgCompare, prefix: str, cmdline): # # Currently, the # of connections is hardcoded at 4 def run_test_pgbench(env: PgCompare, scale: int, duration: int, workload_type: PgBenchLoadType): - env.zenbenchmark.record("scale", scale, '', MetricReport.TEST_PARAM) + env.zenbenchmark.record("scale", scale, "", MetricReport.TEST_PARAM) if workload_type == PgBenchLoadType.INIT: # Run initialize init_pgbench( - env, ['pgbench', f'-s{scale}', '-i', env.pg.connstr(options='-cstatement_timeout=1h')]) + env, ["pgbench", f"-s{scale}", "-i", env.pg.connstr(options="-cstatement_timeout=1h")] + ) if workload_type == PgBenchLoadType.SIMPLE_UPDATE: # Run simple-update workload - run_pgbench(env, - "simple-update", - [ - 'pgbench', - '-N', - '-c4', - f'-T{duration}', - '-P2', - '--progress-timestamp', - env.pg.connstr(), - ]) + run_pgbench( + env, + "simple-update", + [ + "pgbench", + "-N", + "-c4", + f"-T{duration}", + "-P2", + "--progress-timestamp", + env.pg.connstr(), + ], + ) if workload_type == PgBenchLoadType.SELECT_ONLY: # Run SELECT workload - run_pgbench(env, - "select-only", - [ - 'pgbench', - '-S', - '-c4', - f'-T{duration}', - '-P2', - '--progress-timestamp', - env.pg.connstr(), - ]) + run_pgbench( + env, + "select-only", + [ + "pgbench", + "-S", + "-c4", + f"-T{duration}", + "-P2", + "--progress-timestamp", + env.pg.connstr(), + ], + ) env.report_size() @@ -121,12 +128,12 @@ def get_durations_matrix(default: int = 45) -> List[int]: rv = [] for d in durations.split(","): d = d.strip().lower() - if d.endswith('h'): - duration = int(d.removesuffix('h')) * 60 * 60 - elif d.endswith('m'): - duration = int(d.removesuffix('m')) * 60 + if d.endswith("h"): + duration = int(d.removesuffix("h")) * 60 * 60 + elif d.endswith("m"): + duration = int(d.removesuffix("m")) * 60 else: - duration = int(d.removesuffix('s')) + duration = int(d.removesuffix("s")) rv.append(duration) return rv @@ -137,10 +144,10 @@ def get_scales_matrix(default: int = 10) -> List[int]: rv = [] for s in scales.split(","): s = s.strip().lower() - if s.endswith('mb'): - scale = get_scale_for_db(int(s.removesuffix('mb'))) - elif s.endswith('gb'): - scale = get_scale_for_db(int(s.removesuffix('gb')) * 1024) + if s.endswith("mb"): + scale = get_scale_for_db(int(s.removesuffix("mb"))) + elif s.endswith("gb"): + scale = get_scale_for_db(int(s.removesuffix("gb")) * 1024) else: scale = int(s) rv.append(scale) @@ -167,9 +174,9 @@ def test_pgbench(neon_with_baseline: PgCompare, scale: int, duration: int): @pytest.mark.parametrize("duration", get_durations_matrix()) def test_pgbench_flamegraph(zenbenchmark, pg_bin, neon_env_builder, scale: int, duration: int): neon_env_builder.num_safekeepers = 1 - neon_env_builder.pageserver_config_override = ''' + neon_env_builder.pageserver_config_override = """ profiling="page_requests" -''' +""" if not profiling_supported(): pytest.skip("pageserver was built without 'profiling' feature") diff --git a/test_runner/performance/test_random_writes.py b/test_runner/performance/test_random_writes.py index 8931234c51..8ed684af16 100644 --- a/test_runner/performance/test_random_writes.py +++ b/test_runner/performance/test_random_writes.py @@ -1,14 +1,13 @@ import os -from contextlib import closing -from fixtures.benchmark_fixture import MetricReport -from fixtures.neon_fixtures import NeonEnv -from fixtures.compare_fixtures import PgCompare, VanillaCompare, NeonCompare -from fixtures.log_helper import log - -import psycopg2.extras import random import time +from contextlib import closing +import psycopg2.extras +from fixtures.benchmark_fixture import MetricReport +from fixtures.compare_fixtures import NeonCompare, PgCompare, VanillaCompare +from fixtures.log_helper import log +from fixtures.neon_fixtures import NeonEnv from fixtures.utils import query_scalar @@ -43,13 +42,15 @@ def test_random_writes(neon_with_baseline: PgCompare): with closing(env.pg.connect()) as conn: with conn.cursor() as cur: # Create the test table - with env.record_duration('init'): - cur.execute(""" + with env.record_duration("init"): + cur.execute( + """ CREATE TABLE Big( pk integer primary key, count integer default 0 ); - """) + """ + ) # Insert n_rows in batches to avoid query timeouts rows_inserted = 0 @@ -62,7 +63,7 @@ def test_random_writes(neon_with_baseline: PgCompare): # Get table size (can't be predicted because padding and alignment) table_size = query_scalar(cur, "SELECT pg_relation_size('Big')") - env.zenbenchmark.record("table_size", table_size, 'bytes', MetricReport.TEST_PARAM) + env.zenbenchmark.record("table_size", table_size, "bytes", MetricReport.TEST_PARAM) # Decide how much to write, based on knowledge of pageserver implementation. # Avoiding segment collisions maximizes (neon_runtime / vanilla_runtime). @@ -72,13 +73,15 @@ def test_random_writes(neon_with_baseline: PgCompare): # The closer this is to 250 MB, the more realistic the test is. effective_checkpoint_distance = table_size * n_writes // n_rows - env.zenbenchmark.record("effective_checkpoint_distance", - effective_checkpoint_distance, - 'bytes', - MetricReport.TEST_PARAM) + env.zenbenchmark.record( + "effective_checkpoint_distance", + effective_checkpoint_distance, + "bytes", + MetricReport.TEST_PARAM, + ) # Update random keys - with env.record_duration('run'): + with env.record_duration("run"): for it in range(n_iterations): for i in range(n_writes): key = random.randint(1, n_rows) diff --git a/test_runner/performance/test_seqscans.py b/test_runner/performance/test_seqscans.py index 8d7ad46c1a..6094ed38e5 100644 --- a/test_runner/performance/test_seqscans.py +++ b/test_runner/performance/test_seqscans.py @@ -2,15 +2,16 @@ # from contextlib import closing from dataclasses import dataclass -from fixtures.neon_fixtures import NeonEnv -from fixtures.log_helper import log + +import pytest from fixtures.benchmark_fixture import MetricReport, NeonBenchmarker from fixtures.compare_fixtures import PgCompare -import pytest +from fixtures.log_helper import log +from fixtures.neon_fixtures import NeonEnv @pytest.mark.parametrize( - 'rows,iters,workers', + "rows,iters,workers", [ # The test table is large enough (3-4 MB) that it doesn't fit in the compute node # cache, so the seqscans go to the page server. But small enough that it fits @@ -18,31 +19,34 @@ import pytest pytest.param(100000, 100, 0), # Also test with a larger table, with and without parallelism pytest.param(10000000, 1, 0), - pytest.param(10000000, 1, 4) - ]) + pytest.param(10000000, 1, 4), + ], +) def test_seqscans(neon_with_baseline: PgCompare, rows: int, iters: int, workers: int): env = neon_with_baseline with closing(env.pg.connect()) as conn: with conn.cursor() as cur: - cur.execute('create table t (i integer);') - cur.execute(f'insert into t values (generate_series(1,{rows}));') + cur.execute("create table t (i integer);") + cur.execute(f"insert into t values (generate_series(1,{rows}));") # Verify that the table is larger than shared_buffers - cur.execute(''' + cur.execute( + """ select setting::int * pg_size_bytes(unit) as shared_buffers, pg_relation_size('t') as tbl_ize from pg_settings where name = 'shared_buffers' - ''') + """ + ) row = cur.fetchone() assert row is not None shared_buffers = row[0] table_size = row[1] log.info(f"shared_buffers is {shared_buffers}, table size {table_size}") assert int(shared_buffers) < int(table_size) - env.zenbenchmark.record("table_size", table_size, 'bytes', MetricReport.TEST_PARAM) + env.zenbenchmark.record("table_size", table_size, "bytes", MetricReport.TEST_PARAM) cur.execute(f"set max_parallel_workers_per_gather = {workers}") - with env.record_duration('run'): + with env.record_duration("run"): for i in range(iters): - cur.execute('select count(*) from t;') + cur.execute("select count(*) from t;") diff --git a/test_runner/performance/test_startup.py b/test_runner/performance/test_startup.py index 1cfd128e9b..e91b180154 100644 --- a/test_runner/performance/test_startup.py +++ b/test_runner/performance/test_startup.py @@ -1,7 +1,8 @@ -import pytest from contextlib import closing -from fixtures.neon_fixtures import NeonEnvBuilder + +import pytest from fixtures.benchmark_fixture import NeonBenchmarker +from fixtures.neon_fixtures import NeonEnvBuilder # This test sometimes runs for longer than the global 5 minute timeout. @@ -11,15 +12,15 @@ def test_startup(neon_env_builder: NeonEnvBuilder, zenbenchmark: NeonBenchmarker env = neon_env_builder.init_start() # Start - env.neon_cli.create_branch('test_startup') + env.neon_cli.create_branch("test_startup") with zenbenchmark.record_duration("startup_time"): - pg = env.postgres.create_start('test_startup') + pg = env.postgres.create_start("test_startup") pg.safe_psql("select 1;") # Restart pg.stop_and_destroy() with zenbenchmark.record_duration("restart_time"): - pg.create_start('test_startup') + pg.create_start("test_startup") pg.safe_psql("select 1;") # Fill up @@ -28,8 +29,8 @@ def test_startup(neon_env_builder: NeonEnvBuilder, zenbenchmark: NeonBenchmarker with closing(pg.connect()) as conn: with conn.cursor() as cur: for i in range(num_tables): - cur.execute(f'create table t_{i} (i integer);') - cur.execute(f'insert into t_{i} values (generate_series(1,{num_rows}));') + cur.execute(f"create table t_{i} (i integer);") + cur.execute(f"insert into t_{i} values (generate_series(1,{num_rows}));") # Read with zenbenchmark.record_duration("read_time"): @@ -42,7 +43,7 @@ def test_startup(neon_env_builder: NeonEnvBuilder, zenbenchmark: NeonBenchmarker # Restart pg.stop_and_destroy() with zenbenchmark.record_duration("restart_with_data"): - pg.create_start('test_startup') + pg.create_start("test_startup") pg.safe_psql("select 1;") # Read diff --git a/test_runner/performance/test_wal_backpressure.py b/test_runner/performance/test_wal_backpressure.py index bbb5ddecab..03d5ba208a 100644 --- a/test_runner/performance/test_wal_backpressure.py +++ b/test_runner/performance/test_wal_backpressure.py @@ -10,8 +10,7 @@ from fixtures.compare_fixtures import NeonCompare, PgCompare, VanillaCompare from fixtures.log_helper import log from fixtures.neon_fixtures import DEFAULT_BRANCH_NAME, NeonEnvBuilder, PgBin from fixtures.utils import lsn_from_hex - -from performance.test_perf_pgbench import (get_durations_matrix, get_scales_matrix) +from performance.test_perf_pgbench import get_durations_matrix, get_scales_matrix @pytest.fixture(params=["vanilla", "neon_off", "neon_on"]) @@ -30,7 +29,9 @@ def pg_compare(request) -> PgCompare: return fixture else: - assert len(x) == 2, f"request param ({request.param}) should have a format of \ + assert ( + len(x) == 2 + ), f"request param ({request.param}) should have a format of \ `neon_{{safekeepers_enable_fsync}}`" # `NeonCompare` interface @@ -70,8 +71,7 @@ def start_heavy_write_workload(env: PgCompare, n_tables: int, scale: int, num_it with env.record_duration("run_duration"): threads = [ - threading.Thread(target=start_single_table_workload, args=(i, )) - for i in range(n_tables) + threading.Thread(target=start_single_table_workload, args=(i,)) for i in range(n_tables) ] for thread in threads: @@ -95,12 +95,14 @@ def test_heavy_write_workload(pg_compare: PgCompare, n_tables: int, scale: int, ) cur.execute(f"INSERT INTO t{i} (key) VALUES (0)") - workload_thread = threading.Thread(target=start_heavy_write_workload, - args=(env, n_tables, scale, num_iters)) + workload_thread = threading.Thread( + target=start_heavy_write_workload, args=(env, n_tables, scale, num_iters) + ) workload_thread.start() - record_thread = threading.Thread(target=record_lsn_write_lag, - args=(env, lambda: workload_thread.is_alive())) + record_thread = threading.Thread( + target=record_lsn_write_lag, args=(env, lambda: workload_thread.is_alive()) + ) record_thread.start() record_read_latency(env, lambda: workload_thread.is_alive(), "SELECT * from t0 where key = 0") @@ -110,14 +112,16 @@ def test_heavy_write_workload(pg_compare: PgCompare, n_tables: int, scale: int, def start_pgbench_simple_update_workload(env: PgCompare, duration: int): with env.record_duration("run_duration"): - env.pg_bin.run_capture([ - 'pgbench', - '-j10', - '-c10', - '-N', - f'-T{duration}', - env.pg.connstr(options="-csynchronous_commit=off") - ]) + env.pg_bin.run_capture( + [ + "pgbench", + "-j10", + "-c10", + "-N", + f"-T{duration}", + env.pg.connstr(options="-csynchronous_commit=off"), + ] + ) env.flush() @@ -128,20 +132,22 @@ def test_pgbench_simple_update_workload(pg_compare: PgCompare, scale: int, durat env = pg_compare # initialize pgbench tables - env.pg_bin.run_capture(['pgbench', f'-s{scale}', '-i', env.pg.connstr()]) + env.pg_bin.run_capture(["pgbench", f"-s{scale}", "-i", env.pg.connstr()]) env.flush() - workload_thread = threading.Thread(target=start_pgbench_simple_update_workload, - args=(env, duration)) + workload_thread = threading.Thread( + target=start_pgbench_simple_update_workload, args=(env, duration) + ) workload_thread.start() - record_thread = threading.Thread(target=record_lsn_write_lag, - args=(env, lambda: workload_thread.is_alive())) + record_thread = threading.Thread( + target=record_lsn_write_lag, args=(env, lambda: workload_thread.is_alive()) + ) record_thread.start() - record_read_latency(env, - lambda: workload_thread.is_alive(), - "SELECT * from pgbench_accounts where aid = 1") + record_read_latency( + env, lambda: workload_thread.is_alive(), "SELECT * from pgbench_accounts where aid = 1" + ) workload_thread.join() record_thread.join() @@ -150,13 +156,15 @@ def start_pgbench_intensive_initialization(env: PgCompare, scale: int, done_even with env.record_duration("run_duration"): # Needs to increase the statement timeout (default: 120s) because the # initialization step can be slow with a large scale. - env.pg_bin.run_capture([ - 'pgbench', - f'-s{scale}', - '-i', - '-Idtg', - env.pg.connstr(options='-cstatement_timeout=600s') - ]) + env.pg_bin.run_capture( + [ + "pgbench", + f"-s{scale}", + "-i", + "-Idtg", + env.pg.connstr(options="-cstatement_timeout=600s"), + ] + ) done_event.set() @@ -170,12 +178,14 @@ def test_pgbench_intensive_init_workload(pg_compare: PgCompare, scale: int): workload_done_event = threading.Event() - workload_thread = threading.Thread(target=start_pgbench_intensive_initialization, - args=(env, scale, workload_done_event)) + workload_thread = threading.Thread( + target=start_pgbench_intensive_initialization, args=(env, scale, workload_done_event) + ) workload_thread.start() - record_thread = threading.Thread(target=record_lsn_write_lag, - args=(env, lambda: not workload_done_event.is_set())) + record_thread = threading.Thread( + target=record_lsn_write_lag, args=(env, lambda: not workload_done_event.is_set()) + ) record_thread.start() record_read_latency(env, lambda: not workload_done_event.is_set(), "SELECT count(*) from foo") @@ -195,13 +205,15 @@ def record_lsn_write_lag(env: PgCompare, run_cond: Callable[[], bool], pool_inte cur.execute("CREATE EXTENSION neon") while run_cond(): - cur.execute(''' + cur.execute( + """ select pg_wal_lsn_diff(pg_current_wal_flush_lsn(),received_lsn), pg_size_pretty(pg_wal_lsn_diff(pg_current_wal_flush_lsn(),received_lsn)), pg_current_wal_flush_lsn(), received_lsn from backpressure_lsns(); - ''') + """ + ) res = cur.fetchone() lsn_write_lags.append(res[0]) @@ -220,24 +232,29 @@ def record_lsn_write_lag(env: PgCompare, run_cond: Callable[[], bool], pool_inte time.sleep(pool_interval) - env.zenbenchmark.record("lsn_write_lag_max", - float(max(lsn_write_lags) / (1024**2)), - "MB", - MetricReport.LOWER_IS_BETTER) - env.zenbenchmark.record("lsn_write_lag_avg", - float(statistics.mean(lsn_write_lags) / (1024**2)), - "MB", - MetricReport.LOWER_IS_BETTER) - env.zenbenchmark.record("lsn_write_lag_stdev", - float(statistics.stdev(lsn_write_lags) / (1024**2)), - "MB", - MetricReport.LOWER_IS_BETTER) + env.zenbenchmark.record( + "lsn_write_lag_max", + float(max(lsn_write_lags) / (1024**2)), + "MB", + MetricReport.LOWER_IS_BETTER, + ) + env.zenbenchmark.record( + "lsn_write_lag_avg", + float(statistics.mean(lsn_write_lags) / (1024**2)), + "MB", + MetricReport.LOWER_IS_BETTER, + ) + env.zenbenchmark.record( + "lsn_write_lag_stdev", + float(statistics.stdev(lsn_write_lags) / (1024**2)), + "MB", + MetricReport.LOWER_IS_BETTER, + ) -def record_read_latency(env: PgCompare, - run_cond: Callable[[], bool], - read_query: str, - read_interval: float = 1.0): +def record_read_latency( + env: PgCompare, run_cond: Callable[[], bool], read_query: str, read_interval: float = 1.0 +): read_latencies = [] with env.pg.connect().cursor() as cur: @@ -256,15 +273,12 @@ def record_read_latency(env: PgCompare, time.sleep(read_interval) - env.zenbenchmark.record("read_latency_max", - max(read_latencies), - 's', - MetricReport.LOWER_IS_BETTER) - env.zenbenchmark.record("read_latency_avg", - statistics.mean(read_latencies), - 's', - MetricReport.LOWER_IS_BETTER) - env.zenbenchmark.record("read_latency_stdev", - statistics.stdev(read_latencies), - 's', - MetricReport.LOWER_IS_BETTER) + env.zenbenchmark.record( + "read_latency_max", max(read_latencies), "s", MetricReport.LOWER_IS_BETTER + ) + env.zenbenchmark.record( + "read_latency_avg", statistics.mean(read_latencies), "s", MetricReport.LOWER_IS_BETTER + ) + env.zenbenchmark.record( + "read_latency_stdev", statistics.stdev(read_latencies), "s", MetricReport.LOWER_IS_BETTER + ) diff --git a/test_runner/performance/test_write_amplification.py b/test_runner/performance/test_write_amplification.py index 1d729fd78f..7aab469387 100644 --- a/test_runner/performance/test_write_amplification.py +++ b/test_runner/performance/test_write_amplification.py @@ -12,10 +12,11 @@ # Amplification problem at its finest. import os from contextlib import closing + from fixtures.benchmark_fixture import MetricReport -from fixtures.neon_fixtures import NeonEnv -from fixtures.compare_fixtures import PgCompare, VanillaCompare, NeonCompare +from fixtures.compare_fixtures import NeonCompare, PgCompare, VanillaCompare from fixtures.log_helper import log +from fixtures.neon_fixtures import NeonEnv def test_write_amplification(neon_with_baseline: PgCompare): @@ -23,18 +24,20 @@ def test_write_amplification(neon_with_baseline: PgCompare): with closing(env.pg.connect()) as conn: with conn.cursor() as cur: - with env.record_pageserver_writes('pageserver_writes'): - with env.record_duration('run'): + with env.record_pageserver_writes("pageserver_writes"): + with env.record_duration("run"): # NOTE: Because each iteration updates every table already created, # the runtime and write amplification is O(n^2), where n is the # number of iterations. for i in range(25): - cur.execute(f''' + cur.execute( + f""" CREATE TABLE tbl{i} AS SELECT g as i, 'long string to consume some space' || g as t FROM generate_series(1, 100000) g - ''') + """ + ) cur.execute(f"create index on tbl{i} (i);") for j in range(1, i): cur.execute(f"delete from tbl{j} where i = {i}") diff --git a/test_runner/pg_clients/test_pg_clients.py b/test_runner/pg_clients/test_pg_clients.py index a117616358..f91a2adf7d 100644 --- a/test_runner/pg_clients/test_pg_clients.py +++ b/test_runner/pg_clients/test_pg_clients.py @@ -18,10 +18,12 @@ from fixtures.utils import subprocess_capture "python/asyncpg", pytest.param( "python/pg8000", # See https://github.com/neondatabase/neon/pull/2008#discussion_r912264281 - marks=pytest.mark.xfail(reason="Handles SSL in incompatible with Neon way")), + marks=pytest.mark.xfail(reason="Handles SSL in incompatible with Neon way"), + ), pytest.param( "swift/PostgresClientKit", # See https://github.com/neondatabase/neon/pull/2008#discussion_r911896592 - marks=pytest.mark.xfail(reason="Neither SNI nor parameters is supported")), + marks=pytest.mark.xfail(reason="Neither SNI nor parameters is supported"), + ), "typescript/postgresql-client", ], ) @@ -31,12 +33,14 @@ def test_pg_clients(test_output_dir: Path, remote_pg: RemotePostgres, client: st env_file = None with NamedTemporaryFile(mode="w", delete=False) as f: env_file = f.name - f.write(f""" + f.write( + f""" NEON_HOST={conn_options["host"]} NEON_DATABASE={conn_options["dbname"]} NEON_USER={conn_options["user"]} NEON_PASSWORD={conn_options["password"]} - """) + """ + ) image_tag = client.lower() docker_bin = shutil.which("docker") diff --git a/test_runner/test_broken.py b/test_runner/test_broken.py index 3960546689..0281f4f48b 100644 --- a/test_runner/test_broken.py +++ b/test_runner/test_broken.py @@ -1,8 +1,9 @@ -import pytest import os -from fixtures.neon_fixtures import NeonEnv +import pytest from fixtures.log_helper import log +from fixtures.neon_fixtures import NeonEnv + """ Use this test to see what happens when tests fail. @@ -13,8 +14,9 @@ Set the environment variable RUN_BROKEN to see this test run (and fail, and hopefully not leave any server processes behind). """ -run_broken = pytest.mark.skipif(os.environ.get('RUN_BROKEN') is None, - reason="only used for testing the fixtures") +run_broken = pytest.mark.skipif( + os.environ.get("RUN_BROKEN") is None, reason="only used for testing the fixtures" +) @run_broken @@ -23,7 +25,7 @@ def test_broken(neon_simple_env: NeonEnv, pg_bin): env.neon_cli.create_branch("test_broken", "empty") env.postgres.create_start("test_broken") - log.info('postgres is running') + log.info("postgres is running") - log.info('THIS NEXT COMMAND WILL FAIL:') - pg_bin.run('pgbench -i_am_a_broken_test'.split()) + log.info("THIS NEXT COMMAND WILL FAIL:") + pg_bin.run("pgbench -i_am_a_broken_test".split())