diff --git a/.gitignore b/.gitignore
index 8d2f8277b5..da1369ad24 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,5 +2,7 @@
/tmp_check
/tmp_install
/tmp_check_cli
+__pycache__/
+test_output/
.vscode
.zenith
diff --git a/test_runner/README.md b/test_runner/README.md
new file mode 100644
index 0000000000..27c7c499df
--- /dev/null
+++ b/test_runner/README.md
@@ -0,0 +1,77 @@
+## Zenith test runner
+
+This directory contains integration tests.
+
+Prerequisites:
+- pytest, psycopg2
+ Install using something like this:
+ - `pip3 install pytest psycopg2` (Debian or Ubuntu)
+
+- an already compiled zenith + postgres tree
+ - See the root README.md for details
+
+
+### Running the tests
+
+Because pytest will search all subdirectories for tests, it's easiest to
+run the tests from within the `test_runner` directory.
+
+Test state (postgres data, pageserver state, and log files) will
+be stored under a directory `test_output`.
+
+You can run all the tests with:
+
+`pytest`
+
+If you want to run all the tests in a particular file:
+
+`pytest test_pgbench.py`
+
+If you want to run all tests that have the string "bench" in their names:
+
+`pytest -k bench`
+
+Useful environment variables:
+
+`ZENITH_BIN`: The directory where zenith binaries can be found.
+`POSTGRES_BIN`: The directory where postgres binaries can be found.
+`TEST_OUTPUT`: Set the directory where test state and test output files
+should go.
+`TEST_SHARED_FIXTURES`: Try to re-use a single postgres and pageserver
+for all the tests.
+
+Let stdout and stderr go to the terminal instead of capturing them:
+`pytest -s ...`
+(Note many tests capture subprocess outputs separately, so this may not
+show much.)
+
+Exit after the first test failure:
+`pytest -x ...`
+(there are many more pytest options; run `pytest -h` to see them.)
+
+
+### Building new tests
+
+The tests make heavy use of pytest fixtures. You can read about how they work here: https://docs.pytest.org/en/stable/fixture.html
+
+Essentially, this means that each time you see a fixture named as an input parameter, the function with that name will be run and passed as a parameter to the function.
+
+So this code:
+```
+def test_something(zenith_cli, pg_bin):
+ pass
+```
+
+... will run the fixtures called `zenith_cli` and `pg_bin` and deliver those results to the test function.
+
+Fixtures can't be imported using the normal python syntax. Instead, use this:
+```
+pytest_plugins = ("fixtures.something")
+```
+That will make all the fixtures in the `fixtures/something.py` file available.
+
+Anything that's likely to be used in multiple tests should be built into a fixture.
+
+Note that fixtures can clean up after themselves if they use the `yield` syntax.
+Cleanup will happen even if the test fails (raises an unhandled exception).
+Python destructors, e.g. `__del__()` aren't recommended for cleanup.
diff --git a/test_runner/fixtures/__init__.py b/test_runner/fixtures/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/test_runner/fixtures/utils.py b/test_runner/fixtures/utils.py
new file mode 100644
index 0000000000..70d8db3769
--- /dev/null
+++ b/test_runner/fixtures/utils.py
@@ -0,0 +1,53 @@
+
+import os
+import subprocess
+
+def get_self_dir():
+ """ Get the path to the directory where this script lives. """
+ return os.path.dirname(os.path.abspath(__file__))
+
+
+def mkdir_if_needed(path):
+ """ Create a directory if it doesn't already exist
+
+ Note this won't try to create intermediate directories.
+ """
+ if os.path.exists(path):
+ assert os.path.isdir(path)
+ return
+ os.mkdir(path)
+
+
+def subprocess_capture(capture_dir, cmd, **kwargs):
+ """ Run a process and capture its output
+
+ Output will go to files named "cmd_NNN.stdout" and "cmd_NNN.stderr"
+ where "cmd" is the name of the program and NNN is an incrementing
+ counter.
+
+ If those files already exist, we will overwrite them.
+ """
+ assert type(cmd) is list
+ base = os.path.basename(cmd[0]) + '_{}'.format(global_counter())
+ basepath = os.path.join(capture_dir, base)
+ stdout_filename = basepath + '.stdout'
+ stderr_filename = basepath + '.stderr'
+
+ with open(stdout_filename, 'w') as stdout_f:
+ with open(stderr_filename, 'w') as stderr_f:
+ print('(capturing output to "{}.stdout")'.format(base))
+ subprocess.run(cmd, **kwargs, stdout=stdout_f, stderr=stderr_f)
+
+
+_global_counter = 0
+
+
+def global_counter():
+ """ A really dumb global counter.
+
+ This is useful for giving output files a unique number, so if we run the
+ same command multiple times we can keep their output separate.
+ """
+ global _global_counter
+ _global_counter += 1
+ return _global_counter
diff --git a/test_runner/fixtures/zenith_fixtures.py b/test_runner/fixtures/zenith_fixtures.py
new file mode 100644
index 0000000000..a31f6c3509
--- /dev/null
+++ b/test_runner/fixtures/zenith_fixtures.py
@@ -0,0 +1,326 @@
+import os
+import pytest
+import shutil
+import subprocess
+import sys
+from fixtures.utils import (get_self_dir, mkdir_if_needed,
+ subprocess_capture, global_counter)
+
+"""
+This file contains pytest fixtures. A fixture is a test resource that can be
+summoned by placing its name in the test's arguments.
+
+A fixture is created with the decorator @zenfixture, which is a wrapper around
+the standard pytest.fixture with some extra behavior.
+
+There are several environment variables that can control the running of tests:
+ZENITH_BIN, POSTGRES_BIN, etc. See README.md for more information.
+
+To use fixtures in a test file, add this line of code:
+
+ pytest_plugins = ("fixtures.zenith_fixtures")
+
+Don't import functions from this file, or pytest will emit warnings. Instead
+put directly-importable functions into utils.py or another separate file.
+"""
+
+DEFAULT_OUTPUT_DIR = 'test_output'
+DEFAULT_POSTGRES_DIR = 'tmp_install'
+
+
+def determine_scope(fixture_name, config):
+ return 'session'
+
+
+def zenfixture(func):
+ """ This is a python decorator for fixtures with a flexible scope.
+
+ By default every test function will set up and tear down a new
+ database. In pytest, this is called fixtures "function" scope.
+
+ If the environment variable TEST_SHARED_FIXTURES is set, then all
+ tests will share the same database. State, logs, etc. will be
+ stored in a directory called "shared".
+
+ """
+ if os.environ.get('TEST_SHARED_FIXTURES') is None:
+ scope = 'function'
+ else:
+ scope = 'session'
+ return pytest.fixture(func, scope=scope)
+
+
+@pytest.fixture(autouse=True, scope='session')
+def safety_check():
+ """ Ensure that no unwanted daemons are running before we start testing. """
+ cmd = ['pgrep', '-c', 'pageserver|postgres']
+ result = subprocess.run(cmd)
+ if result.returncode == 0:
+ # returncode of 0 means it found something.
+ # This is bad; we don't want any of those processes polluting the
+ # result of the test.
+ raise Exception('found interfering processes running')
+
+
+class ZenithCli:
+ """ An object representing the CLI binary named "zenith".
+
+ We also store an environment that will tell the CLI to operate
+ on a particular ZENITH_REPO_DIR.
+ """
+
+ def __init__(self, binpath, repo_dir, pg_distrib_dir):
+ assert os.path.isdir(binpath)
+ self.binpath = binpath
+ self.bin_zenith = os.path.join(binpath, 'zenith')
+ self.env = os.environ.copy()
+ self.env['ZENITH_REPO_DIR'] = repo_dir
+ self.env['POSTGRES_BIN'] = pg_distrib_dir
+
+ def run(self, arguments):
+ """ Run "zenith" with the specified arguments.
+
+ arguments must be in list form, e.g. ['pg', 'create']
+ """
+ assert type(arguments) == list
+ args = [self.bin_zenith] + arguments
+ print('Running command "{}"'.format(' '.join(args)))
+ subprocess.run(args, env=self.env, check=True)
+
+ def run_init(self):
+ """ Run the "zenith init
" command. """
+ self.run(['init'])
+
+
+@zenfixture
+def zenith_cli(zenith_binpath, repo_dir, pg_distrib_dir):
+ return ZenithCli(zenith_binpath, repo_dir, pg_distrib_dir)
+
+
+class ZenithPageserver:
+ """ An object representing a running pageserver. """
+
+ def __init__(self, zenith_cli):
+ self.zenith_cli = zenith_cli
+ self.running = False
+
+ def start(self):
+ self.zenith_cli.run(['pageserver', 'start'])
+ self.running = True
+
+ def stop(self):
+ # FIXME: this is a todo!() in the zenith cli code
+ if self.running:
+ try:
+ subprocess.run(['killall', 'pageserver'])
+ except FileNotFoundError:
+ print(
+ 'WARNING: Failed to terminate pageserver, "killall" not found', file=sys.stderr)
+
+
+@zenfixture
+def pageserver(zenith_cli):
+ ps = ZenithPageserver(zenith_cli)
+ yield ps
+ # After the yield comes any cleanup code we need.
+ print('Starting pageserver cleanup')
+ ps.stop()
+
+
+class Postgres:
+ """ An object representing a running pageserver. """
+
+ def __init__(self, zenith_cli):
+ self.zenith_cli = zenith_cli
+ self.running = False
+ # path to conf is /pgdatadirs/pg1/postgresql.conf
+
+ def create_start(self):
+ """ create the pg data directory, and start the server """
+ self.zenith_cli.run(['pg', 'create'])
+ # FIXME: where did the name pg1 come from?
+ self.zenith_cli.run(['pg', 'start', 'pg1'])
+ self.running = True
+
+ def stop(self):
+ if self.running:
+ self.zenith_cli.run(['pg', 'stop', 'pg1'])
+
+
+@zenfixture
+def postgres(zenith_cli):
+ pg = Postgres(zenith_cli)
+ yield pg
+ # After the yield comes any cleanup code we need.
+ print('Starting postgres cleanup')
+ pg.stop()
+
+
+class PgBin:
+ """ A helper class for executing postgres binaries """
+
+ def __init__(self, log_dir, pg_distrib_dir):
+ self.log_dir = log_dir
+ self.pg_install_path = pg_distrib_dir
+ self.pg_bin_path = os.path.join(self.pg_install_path, 'bin')
+ self.env = os.environ.copy()
+ self.env['LD_LIBRARY_PATH'] = os.path.join(self.pg_install_path, 'lib')
+
+ def _fixpath(self, command):
+ if not '/' in command[0]:
+ command[0] = os.path.join(self.pg_bin_path, command[0])
+
+ def _build_env(self, env_add):
+ if env_add is None:
+ return self.env
+ env = self.env.copy()
+ env.update(env_add)
+ return env
+
+ def run(self, command, env=None, cwd=None):
+ """ Run one of the postgres binaries.
+
+ The command should be in list form, e.g. ['pgbench', '-p', '55432']
+
+ All the necessary environment variables will be set.
+
+ If the first argument (the command name) doesn't include a path (no '/'
+ characters present), then it will be edited to include the correct path.
+
+ If you want stdout/stderr captured to files, use `run_capture` instead.
+
+ """
+ self._fixpath(command)
+ print('Running command "{}"'.format(' '.join(command)))
+ env = self._build_env(env)
+ subprocess.run(command, env=env, cwd=cwd, check=True)
+
+ def run_capture(self, command, env=None, cwd=None):
+ """ Run one of the postgres binaries, with stderr and stdout redirected to a file.
+
+ This is just like `run`, but for chatty programs.
+ """
+ self._fixpath(command)
+ print('Running command "{}"'.format(' '.join(command)))
+ env = self._build_env(env)
+ subprocess_capture(self.log_dir, command, env=env, cwd=cwd, check=True)
+
+
+@zenfixture
+def pg_bin(test_output_dir, pg_distrib_dir):
+ return PgBin(test_output_dir, pg_distrib_dir)
+
+
+@zenfixture
+def base_dir():
+ """ find the base directory (currently this is the git root) """
+ base_dir = os.path.normpath(os.path.join(get_self_dir(), '../..'))
+ print('base_dir is', base_dir)
+ return base_dir
+
+
+@zenfixture
+def top_output_dir(base_dir):
+ """ Compute the top-level directory for all tests. """
+ env_test_output = os.environ.get('TEST_OUTPUT')
+ if env_test_output is not None:
+ output_dir = env_test_output
+ else:
+ output_dir = os.path.join(base_dir, DEFAULT_OUTPUT_DIR)
+ mkdir_if_needed(output_dir)
+ return output_dir
+
+
+@zenfixture
+def test_output_dir(request, top_output_dir):
+ """ Compute the working directory for an individual test. """
+ if os.environ.get('TEST_SHARED_FIXTURES') is None:
+ # one directory per test
+ test_name = request.node.name
+ else:
+ # We're running shared fixtures. Share a single directory.
+ test_name = 'shared'
+
+ test_output_dir = os.path.join(top_output_dir, test_name)
+ print('test_output_dir is', test_output_dir)
+ shutil.rmtree(test_output_dir, ignore_errors=True)
+ mkdir_if_needed(test_output_dir)
+ return test_output_dir
+
+
+@zenfixture
+def repo_dir(request, test_output_dir):
+ """ Compute the test repo_dir
+
+ "repo_dir" is the place where all of the pageserver files will go.
+ It doesn't have anything to do with the git repo.
+ """
+ repo_dir = os.path.join(test_output_dir, 'repo')
+ return repo_dir
+
+
+@zenfixture
+def zenith_binpath(base_dir):
+ """ find the zenith binaries """
+ env_zenith_bin = os.environ.get('ZENITH_BIN')
+ if env_zenith_bin:
+ zenith_dir = env_zenith_bin
+ else:
+ zenith_dir = os.path.join(base_dir, 'target/debug')
+ if not os.path.exists(os.path.join(zenith_dir, 'pageserver')):
+ raise Exception('zenith binaries not found at "{}"'.format(zenith_dir))
+ return zenith_dir
+
+
+@zenfixture
+def pg_distrib_dir(base_dir):
+ """ find the postgress install """
+ env_postgres_bin = os.environ.get('POSTGRES_BIN')
+ if env_postgres_bin:
+ pg_dir = env_postgres_bin
+ else:
+ pg_dir = os.path.normpath(os.path.join(base_dir, DEFAULT_POSTGRES_DIR))
+ print('postgres dir is', pg_dir)
+ if not os.path.exists(os.path.join(pg_dir, 'bin/postgres')):
+ raise Exception('postgres not found at "{}"'.format(pg_dir))
+ return pg_dir
+
+
+class SimpleTest:
+ """ A fixture object that contains the things we need for a simple test.
+
+ This is an object with common fixture members:
+ zenith_cli
+ pageserver
+ postgres
+ pg_bin
+
+ Example:
+
+ @zenfixture
+ def my_test(zen_simple):
+ zen_simple.pg_bin.run(['pgbench', '-i'])
+
+ """
+
+ def __init__(self, zenith_cli, pageserver, postgres, pg_bin):
+ self.zenith_cli = zenith_cli
+ self.pageserver = pageserver
+ self.postgres = postgres
+ self.pg_bin = pg_bin
+
+ def start(self):
+ """ Start a pageserver and postgres. """
+ self.zenith_cli.run_init()
+ self.pageserver.start()
+ print('pageserver is running')
+
+ self.postgres.create_start()
+ print('postgres is running')
+
+
+@zenfixture
+def zen_simple(zenith_cli, pageserver, postgres, pg_bin):
+ simple = SimpleTest(zenith_cli, pageserver, postgres, pg_bin)
+ simple.start()
+ return simple
diff --git a/test_runner/test_broken.py b/test_runner/test_broken.py
new file mode 100644
index 0000000000..fda9fe0b23
--- /dev/null
+++ b/test_runner/test_broken.py
@@ -0,0 +1,34 @@
+import pytest
+import os
+
+pytest_plugins = ("fixtures.zenith_fixtures")
+
+"""
+
+Use this test to see what happens when tests fail.
+
+We should be able to clean up after ourselves, including stopping any
+postgres or pageserver processes.
+
+Set the environment variable RUN_BROKEN to see this test run (and fail,
+and hopefully not leave any server processes behind).
+
+"""
+
+
+run_broken = pytest.mark.skipif(
+ os.environ.get('RUN_BROKEN') == None,
+ reason="only used for testing the fixtures"
+)
+
+@run_broken
+def test_broken(zenith_cli, pageserver, postgres, pg_bin):
+ zenith_cli.run_init()
+ pageserver.start()
+ print('pageserver is running')
+
+ postgres.create_start()
+ print('postgres is running')
+
+ print('THIS NEXT COMMAND WILL FAIL:')
+ pg_bin.run('pgbench -i_am_a_broken_test'.split())
diff --git a/test_runner/test_pg_regress.py b/test_runner/test_pg_regress.py
new file mode 100644
index 0000000000..ce0a7d4d0a
--- /dev/null
+++ b/test_runner/test_pg_regress.py
@@ -0,0 +1,61 @@
+import pytest
+from fixtures.utils import mkdir_if_needed
+import getpass
+import os
+import psycopg2
+
+pytest_plugins = ("fixtures.zenith_fixtures")
+
+# FIXME: put host + port in a fixture
+HOST = 'localhost'
+PORT = 55432
+
+
+def test_pg_regress(zen_simple, test_output_dir, pg_distrib_dir, base_dir):
+
+ # Connect to postgres and create a database called "regression".
+ username = getpass.getuser()
+ conn_str = 'host={} port={} dbname=postgres user={}'.format(
+ HOST, PORT, username)
+ print('conn_str is', conn_str)
+ pg_conn = psycopg2.connect(conn_str)
+ pg_conn.set_isolation_level(psycopg2.extensions.ISOLATION_LEVEL_AUTOCOMMIT)
+ cur = pg_conn.cursor()
+ cur.execute('CREATE DATABASE regression')
+ pg_conn.close()
+
+ # Create some local directories for pg_regress to run in.
+ runpath = os.path.join(test_output_dir, 'regress')
+ mkdir_if_needed(runpath)
+ mkdir_if_needed(os.path.join(runpath, 'testtablespace'))
+
+ # Compute all the file locations that pg_regress will need.
+ build_path = os.path.join(
+ pg_distrib_dir, 'build/src/test/regress')
+ src_path = os.path.join(
+ base_dir, 'vendor/postgres/src/test/regress')
+ bindir = os.path.join(pg_distrib_dir, 'bin')
+ schedule = os.path.join(src_path, 'parallel_schedule')
+ pg_regress = os.path.join(build_path, 'pg_regress')
+
+ pg_regress_command = [
+ pg_regress,
+ '--bindir=""',
+ '--use-existing',
+ '--bindir={}'.format(bindir),
+ '--dlpath={}'.format(build_path),
+ '--schedule={}'.format(schedule),
+ '--inputdir={}'.format(src_path),
+ ]
+
+ env = {
+ 'PGPORT': str(PORT),
+ 'PGUSER': username,
+ 'PGHOST': HOST,
+ }
+
+ # Run the command.
+ # We don't capture the output. It's not too chatty, and it always
+ # logs the exact same data to `regression.out` anyway.
+
+ zen_simple.pg_bin.run(pg_regress_command, env=env, cwd=runpath)
diff --git a/test_runner/test_pgbench.py b/test_runner/test_pgbench.py
new file mode 100644
index 0000000000..d04e7077e3
--- /dev/null
+++ b/test_runner/test_pgbench.py
@@ -0,0 +1,10 @@
+import pytest
+
+pytest_plugins = ("fixtures.zenith_fixtures")
+
+
+def test_pgbench(zen_simple):
+ zen_simple.pg_bin.run_capture(
+ 'pgbench -h localhost -p 55432 -i postgres'.split())
+ zen_simple.pg_bin.run_capture(
+ 'pgbench -h localhost -p 55432 -c 10 -T 100 -P 1 -M prepared postgres'.split())