From 37cd662ab20f7040237b422eae2f0f3e1612cc1f Mon Sep 17 00:00:00 2001 From: Eric Seppanen Date: Sun, 2 May 2021 10:49:51 -0700 Subject: [PATCH] add pytest integration tests Use pytest to manage background services, paths, and environment variables. Benefits: - Tests are a little easier to write. - Cleanup is more reliable. You can CTRL-C a test and it will still shut down gracefully. If you manually start a conflicting process, the test fixtures will detect this and abort at startup. - Don't need to worry about remembering '--test-threads=1' - Output of sub-processes can be captured to files. - Test fixtures configure everything to operate under a single test output directory, making it easier to capture logs in CI. - Detects all the necessary paths if run from the git root, but can also run from arbitrary paths by setting environment variables. There is also a deliberately broken test (test_broken.py) that can be used to test whether the test fixtures properly clean up after themselves. It won't run by default; the comment at the top explains how to enable it. --- .gitignore | 2 + test_runner/README.md | 77 ++++++ test_runner/fixtures/__init__.py | 0 test_runner/fixtures/utils.py | 53 ++++ test_runner/fixtures/zenith_fixtures.py | 326 ++++++++++++++++++++++++ test_runner/test_broken.py | 34 +++ test_runner/test_pg_regress.py | 61 +++++ test_runner/test_pgbench.py | 10 + 8 files changed, 563 insertions(+) create mode 100644 test_runner/README.md create mode 100644 test_runner/fixtures/__init__.py create mode 100644 test_runner/fixtures/utils.py create mode 100644 test_runner/fixtures/zenith_fixtures.py create mode 100644 test_runner/test_broken.py create mode 100644 test_runner/test_pg_regress.py create mode 100644 test_runner/test_pgbench.py diff --git a/.gitignore b/.gitignore index 8d2f8277b5..da1369ad24 100644 --- a/.gitignore +++ b/.gitignore @@ -2,5 +2,7 @@ /tmp_check /tmp_install /tmp_check_cli +__pycache__/ +test_output/ .vscode .zenith diff --git a/test_runner/README.md b/test_runner/README.md new file mode 100644 index 0000000000..27c7c499df --- /dev/null +++ b/test_runner/README.md @@ -0,0 +1,77 @@ +## Zenith test runner + +This directory contains integration tests. + +Prerequisites: +- pytest, psycopg2 + Install using something like this: + - `pip3 install pytest psycopg2` (Debian or Ubuntu) + +- an already compiled zenith + postgres tree + - See the root README.md for details + + +### Running the tests + +Because pytest will search all subdirectories for tests, it's easiest to +run the tests from within the `test_runner` directory. + +Test state (postgres data, pageserver state, and log files) will +be stored under a directory `test_output`. + +You can run all the tests with: + +`pytest` + +If you want to run all the tests in a particular file: + +`pytest test_pgbench.py` + +If you want to run all tests that have the string "bench" in their names: + +`pytest -k bench` + +Useful environment variables: + +`ZENITH_BIN`: The directory where zenith binaries can be found. +`POSTGRES_BIN`: The directory where postgres binaries can be found. +`TEST_OUTPUT`: Set the directory where test state and test output files +should go. +`TEST_SHARED_FIXTURES`: Try to re-use a single postgres and pageserver +for all the tests. + +Let stdout and stderr go to the terminal instead of capturing them: +`pytest -s ...` +(Note many tests capture subprocess outputs separately, so this may not +show much.) + +Exit after the first test failure: +`pytest -x ...` +(there are many more pytest options; run `pytest -h` to see them.) + + +### Building new tests + +The tests make heavy use of pytest fixtures. You can read about how they work here: https://docs.pytest.org/en/stable/fixture.html + +Essentially, this means that each time you see a fixture named as an input parameter, the function with that name will be run and passed as a parameter to the function. + +So this code: +``` +def test_something(zenith_cli, pg_bin): + pass +``` + +... will run the fixtures called `zenith_cli` and `pg_bin` and deliver those results to the test function. + +Fixtures can't be imported using the normal python syntax. Instead, use this: +``` +pytest_plugins = ("fixtures.something") +``` +That will make all the fixtures in the `fixtures/something.py` file available. + +Anything that's likely to be used in multiple tests should be built into a fixture. + +Note that fixtures can clean up after themselves if they use the `yield` syntax. +Cleanup will happen even if the test fails (raises an unhandled exception). +Python destructors, e.g. `__del__()` aren't recommended for cleanup. diff --git a/test_runner/fixtures/__init__.py b/test_runner/fixtures/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/test_runner/fixtures/utils.py b/test_runner/fixtures/utils.py new file mode 100644 index 0000000000..70d8db3769 --- /dev/null +++ b/test_runner/fixtures/utils.py @@ -0,0 +1,53 @@ + +import os +import subprocess + +def get_self_dir(): + """ Get the path to the directory where this script lives. """ + return os.path.dirname(os.path.abspath(__file__)) + + +def mkdir_if_needed(path): + """ Create a directory if it doesn't already exist + + Note this won't try to create intermediate directories. + """ + if os.path.exists(path): + assert os.path.isdir(path) + return + os.mkdir(path) + + +def subprocess_capture(capture_dir, cmd, **kwargs): + """ Run a process and capture its output + + Output will go to files named "cmd_NNN.stdout" and "cmd_NNN.stderr" + where "cmd" is the name of the program and NNN is an incrementing + counter. + + If those files already exist, we will overwrite them. + """ + assert type(cmd) is list + base = os.path.basename(cmd[0]) + '_{}'.format(global_counter()) + basepath = os.path.join(capture_dir, base) + stdout_filename = basepath + '.stdout' + stderr_filename = basepath + '.stderr' + + with open(stdout_filename, 'w') as stdout_f: + with open(stderr_filename, 'w') as stderr_f: + print('(capturing output to "{}.stdout")'.format(base)) + subprocess.run(cmd, **kwargs, stdout=stdout_f, stderr=stderr_f) + + +_global_counter = 0 + + +def global_counter(): + """ A really dumb global counter. + + This is useful for giving output files a unique number, so if we run the + same command multiple times we can keep their output separate. + """ + global _global_counter + _global_counter += 1 + return _global_counter diff --git a/test_runner/fixtures/zenith_fixtures.py b/test_runner/fixtures/zenith_fixtures.py new file mode 100644 index 0000000000..a31f6c3509 --- /dev/null +++ b/test_runner/fixtures/zenith_fixtures.py @@ -0,0 +1,326 @@ +import os +import pytest +import shutil +import subprocess +import sys +from fixtures.utils import (get_self_dir, mkdir_if_needed, + subprocess_capture, global_counter) + +""" +This file contains pytest fixtures. A fixture is a test resource that can be +summoned by placing its name in the test's arguments. + +A fixture is created with the decorator @zenfixture, which is a wrapper around +the standard pytest.fixture with some extra behavior. + +There are several environment variables that can control the running of tests: +ZENITH_BIN, POSTGRES_BIN, etc. See README.md for more information. + +To use fixtures in a test file, add this line of code: + + pytest_plugins = ("fixtures.zenith_fixtures") + +Don't import functions from this file, or pytest will emit warnings. Instead +put directly-importable functions into utils.py or another separate file. +""" + +DEFAULT_OUTPUT_DIR = 'test_output' +DEFAULT_POSTGRES_DIR = 'tmp_install' + + +def determine_scope(fixture_name, config): + return 'session' + + +def zenfixture(func): + """ This is a python decorator for fixtures with a flexible scope. + + By default every test function will set up and tear down a new + database. In pytest, this is called fixtures "function" scope. + + If the environment variable TEST_SHARED_FIXTURES is set, then all + tests will share the same database. State, logs, etc. will be + stored in a directory called "shared". + + """ + if os.environ.get('TEST_SHARED_FIXTURES') is None: + scope = 'function' + else: + scope = 'session' + return pytest.fixture(func, scope=scope) + + +@pytest.fixture(autouse=True, scope='session') +def safety_check(): + """ Ensure that no unwanted daemons are running before we start testing. """ + cmd = ['pgrep', '-c', 'pageserver|postgres'] + result = subprocess.run(cmd) + if result.returncode == 0: + # returncode of 0 means it found something. + # This is bad; we don't want any of those processes polluting the + # result of the test. + raise Exception('found interfering processes running') + + +class ZenithCli: + """ An object representing the CLI binary named "zenith". + + We also store an environment that will tell the CLI to operate + on a particular ZENITH_REPO_DIR. + """ + + def __init__(self, binpath, repo_dir, pg_distrib_dir): + assert os.path.isdir(binpath) + self.binpath = binpath + self.bin_zenith = os.path.join(binpath, 'zenith') + self.env = os.environ.copy() + self.env['ZENITH_REPO_DIR'] = repo_dir + self.env['POSTGRES_BIN'] = pg_distrib_dir + + def run(self, arguments): + """ Run "zenith" with the specified arguments. + + arguments must be in list form, e.g. ['pg', 'create'] + """ + assert type(arguments) == list + args = [self.bin_zenith] + arguments + print('Running command "{}"'.format(' '.join(args))) + subprocess.run(args, env=self.env, check=True) + + def run_init(self): + """ Run the "zenith init " command. """ + self.run(['init']) + + +@zenfixture +def zenith_cli(zenith_binpath, repo_dir, pg_distrib_dir): + return ZenithCli(zenith_binpath, repo_dir, pg_distrib_dir) + + +class ZenithPageserver: + """ An object representing a running pageserver. """ + + def __init__(self, zenith_cli): + self.zenith_cli = zenith_cli + self.running = False + + def start(self): + self.zenith_cli.run(['pageserver', 'start']) + self.running = True + + def stop(self): + # FIXME: this is a todo!() in the zenith cli code + if self.running: + try: + subprocess.run(['killall', 'pageserver']) + except FileNotFoundError: + print( + 'WARNING: Failed to terminate pageserver, "killall" not found', file=sys.stderr) + + +@zenfixture +def pageserver(zenith_cli): + ps = ZenithPageserver(zenith_cli) + yield ps + # After the yield comes any cleanup code we need. + print('Starting pageserver cleanup') + ps.stop() + + +class Postgres: + """ An object representing a running pageserver. """ + + def __init__(self, zenith_cli): + self.zenith_cli = zenith_cli + self.running = False + # path to conf is /pgdatadirs/pg1/postgresql.conf + + def create_start(self): + """ create the pg data directory, and start the server """ + self.zenith_cli.run(['pg', 'create']) + # FIXME: where did the name pg1 come from? + self.zenith_cli.run(['pg', 'start', 'pg1']) + self.running = True + + def stop(self): + if self.running: + self.zenith_cli.run(['pg', 'stop', 'pg1']) + + +@zenfixture +def postgres(zenith_cli): + pg = Postgres(zenith_cli) + yield pg + # After the yield comes any cleanup code we need. + print('Starting postgres cleanup') + pg.stop() + + +class PgBin: + """ A helper class for executing postgres binaries """ + + def __init__(self, log_dir, pg_distrib_dir): + self.log_dir = log_dir + self.pg_install_path = pg_distrib_dir + self.pg_bin_path = os.path.join(self.pg_install_path, 'bin') + self.env = os.environ.copy() + self.env['LD_LIBRARY_PATH'] = os.path.join(self.pg_install_path, 'lib') + + def _fixpath(self, command): + if not '/' in command[0]: + command[0] = os.path.join(self.pg_bin_path, command[0]) + + def _build_env(self, env_add): + if env_add is None: + return self.env + env = self.env.copy() + env.update(env_add) + return env + + def run(self, command, env=None, cwd=None): + """ Run one of the postgres binaries. + + The command should be in list form, e.g. ['pgbench', '-p', '55432'] + + All the necessary environment variables will be set. + + If the first argument (the command name) doesn't include a path (no '/' + characters present), then it will be edited to include the correct path. + + If you want stdout/stderr captured to files, use `run_capture` instead. + + """ + self._fixpath(command) + print('Running command "{}"'.format(' '.join(command))) + env = self._build_env(env) + subprocess.run(command, env=env, cwd=cwd, check=True) + + def run_capture(self, command, env=None, cwd=None): + """ Run one of the postgres binaries, with stderr and stdout redirected to a file. + + This is just like `run`, but for chatty programs. + """ + self._fixpath(command) + print('Running command "{}"'.format(' '.join(command))) + env = self._build_env(env) + subprocess_capture(self.log_dir, command, env=env, cwd=cwd, check=True) + + +@zenfixture +def pg_bin(test_output_dir, pg_distrib_dir): + return PgBin(test_output_dir, pg_distrib_dir) + + +@zenfixture +def base_dir(): + """ find the base directory (currently this is the git root) """ + base_dir = os.path.normpath(os.path.join(get_self_dir(), '../..')) + print('base_dir is', base_dir) + return base_dir + + +@zenfixture +def top_output_dir(base_dir): + """ Compute the top-level directory for all tests. """ + env_test_output = os.environ.get('TEST_OUTPUT') + if env_test_output is not None: + output_dir = env_test_output + else: + output_dir = os.path.join(base_dir, DEFAULT_OUTPUT_DIR) + mkdir_if_needed(output_dir) + return output_dir + + +@zenfixture +def test_output_dir(request, top_output_dir): + """ Compute the working directory for an individual test. """ + if os.environ.get('TEST_SHARED_FIXTURES') is None: + # one directory per test + test_name = request.node.name + else: + # We're running shared fixtures. Share a single directory. + test_name = 'shared' + + test_output_dir = os.path.join(top_output_dir, test_name) + print('test_output_dir is', test_output_dir) + shutil.rmtree(test_output_dir, ignore_errors=True) + mkdir_if_needed(test_output_dir) + return test_output_dir + + +@zenfixture +def repo_dir(request, test_output_dir): + """ Compute the test repo_dir + + "repo_dir" is the place where all of the pageserver files will go. + It doesn't have anything to do with the git repo. + """ + repo_dir = os.path.join(test_output_dir, 'repo') + return repo_dir + + +@zenfixture +def zenith_binpath(base_dir): + """ find the zenith binaries """ + env_zenith_bin = os.environ.get('ZENITH_BIN') + if env_zenith_bin: + zenith_dir = env_zenith_bin + else: + zenith_dir = os.path.join(base_dir, 'target/debug') + if not os.path.exists(os.path.join(zenith_dir, 'pageserver')): + raise Exception('zenith binaries not found at "{}"'.format(zenith_dir)) + return zenith_dir + + +@zenfixture +def pg_distrib_dir(base_dir): + """ find the postgress install """ + env_postgres_bin = os.environ.get('POSTGRES_BIN') + if env_postgres_bin: + pg_dir = env_postgres_bin + else: + pg_dir = os.path.normpath(os.path.join(base_dir, DEFAULT_POSTGRES_DIR)) + print('postgres dir is', pg_dir) + if not os.path.exists(os.path.join(pg_dir, 'bin/postgres')): + raise Exception('postgres not found at "{}"'.format(pg_dir)) + return pg_dir + + +class SimpleTest: + """ A fixture object that contains the things we need for a simple test. + + This is an object with common fixture members: + zenith_cli + pageserver + postgres + pg_bin + + Example: + + @zenfixture + def my_test(zen_simple): + zen_simple.pg_bin.run(['pgbench', '-i']) + + """ + + def __init__(self, zenith_cli, pageserver, postgres, pg_bin): + self.zenith_cli = zenith_cli + self.pageserver = pageserver + self.postgres = postgres + self.pg_bin = pg_bin + + def start(self): + """ Start a pageserver and postgres. """ + self.zenith_cli.run_init() + self.pageserver.start() + print('pageserver is running') + + self.postgres.create_start() + print('postgres is running') + + +@zenfixture +def zen_simple(zenith_cli, pageserver, postgres, pg_bin): + simple = SimpleTest(zenith_cli, pageserver, postgres, pg_bin) + simple.start() + return simple diff --git a/test_runner/test_broken.py b/test_runner/test_broken.py new file mode 100644 index 0000000000..fda9fe0b23 --- /dev/null +++ b/test_runner/test_broken.py @@ -0,0 +1,34 @@ +import pytest +import os + +pytest_plugins = ("fixtures.zenith_fixtures") + +""" + +Use this test to see what happens when tests fail. + +We should be able to clean up after ourselves, including stopping any +postgres or pageserver processes. + +Set the environment variable RUN_BROKEN to see this test run (and fail, +and hopefully not leave any server processes behind). + +""" + + +run_broken = pytest.mark.skipif( + os.environ.get('RUN_BROKEN') == None, + reason="only used for testing the fixtures" +) + +@run_broken +def test_broken(zenith_cli, pageserver, postgres, pg_bin): + zenith_cli.run_init() + pageserver.start() + print('pageserver is running') + + postgres.create_start() + print('postgres is running') + + print('THIS NEXT COMMAND WILL FAIL:') + pg_bin.run('pgbench -i_am_a_broken_test'.split()) diff --git a/test_runner/test_pg_regress.py b/test_runner/test_pg_regress.py new file mode 100644 index 0000000000..ce0a7d4d0a --- /dev/null +++ b/test_runner/test_pg_regress.py @@ -0,0 +1,61 @@ +import pytest +from fixtures.utils import mkdir_if_needed +import getpass +import os +import psycopg2 + +pytest_plugins = ("fixtures.zenith_fixtures") + +# FIXME: put host + port in a fixture +HOST = 'localhost' +PORT = 55432 + + +def test_pg_regress(zen_simple, test_output_dir, pg_distrib_dir, base_dir): + + # Connect to postgres and create a database called "regression". + username = getpass.getuser() + conn_str = 'host={} port={} dbname=postgres user={}'.format( + HOST, PORT, username) + print('conn_str is', conn_str) + pg_conn = psycopg2.connect(conn_str) + pg_conn.set_isolation_level(psycopg2.extensions.ISOLATION_LEVEL_AUTOCOMMIT) + cur = pg_conn.cursor() + cur.execute('CREATE DATABASE regression') + pg_conn.close() + + # Create some local directories for pg_regress to run in. + runpath = os.path.join(test_output_dir, 'regress') + mkdir_if_needed(runpath) + mkdir_if_needed(os.path.join(runpath, 'testtablespace')) + + # Compute all the file locations that pg_regress will need. + build_path = os.path.join( + pg_distrib_dir, 'build/src/test/regress') + src_path = os.path.join( + base_dir, 'vendor/postgres/src/test/regress') + bindir = os.path.join(pg_distrib_dir, 'bin') + schedule = os.path.join(src_path, 'parallel_schedule') + pg_regress = os.path.join(build_path, 'pg_regress') + + pg_regress_command = [ + pg_regress, + '--bindir=""', + '--use-existing', + '--bindir={}'.format(bindir), + '--dlpath={}'.format(build_path), + '--schedule={}'.format(schedule), + '--inputdir={}'.format(src_path), + ] + + env = { + 'PGPORT': str(PORT), + 'PGUSER': username, + 'PGHOST': HOST, + } + + # Run the command. + # We don't capture the output. It's not too chatty, and it always + # logs the exact same data to `regression.out` anyway. + + zen_simple.pg_bin.run(pg_regress_command, env=env, cwd=runpath) diff --git a/test_runner/test_pgbench.py b/test_runner/test_pgbench.py new file mode 100644 index 0000000000..d04e7077e3 --- /dev/null +++ b/test_runner/test_pgbench.py @@ -0,0 +1,10 @@ +import pytest + +pytest_plugins = ("fixtures.zenith_fixtures") + + +def test_pgbench(zen_simple): + zen_simple.pg_bin.run_capture( + 'pgbench -h localhost -p 55432 -i postgres'.split()) + zen_simple.pg_bin.run_capture( + 'pgbench -h localhost -p 55432 -c 10 -T 100 -P 1 -M prepared postgres'.split())