From ab2f0ad1a840cac4381ecd3235c2ac50d58edd3c Mon Sep 17 00:00:00 2001
From: Heikki Linnakangas <heikki.linnakangas@iki.fi>
Date: Mon, 17 May 2021 20:37:28 +0300
Subject: [PATCH] Fix and reorganize python tests.

- The 'pageserver' fixture now sets up the repository and starts up
  the Page Server automatically. In other words, the 'pageserver'
  fixture provides a Page Server that's up and running and ready to
  use in tests.

- The 'pageserver' fixture now also creates a branch called 'empty',
  right after initializing the repository. By convention, all the
  tests start by createing a new branch off 'empty' for the test. This
  allows running all the tests against the same Page Server
  concurrently.  (I haven't tested that though. pytest doensn't
  provide an option to run tests in parallel but there are extensions
  for that.)

- Remove the 'zen_simple' fixture. Now that 'pageserver' provides
  server that's up and running, it's pretty simple to use the
  'pageserver' and 'postgres' fixtures directly.

- Don't assume host name or ports in the tests. They now use the
  fields in the fixtures for that. That allows assigning the ports
  dynamically, making it possible to run multiple page servers in
  parallel, or running the tests in parallel with another page
  server. This commit still hard codes the Page Server's port in the
  fixture, though, so more work is needed to actually make it
  possible.

- I made some changes to the 'postgres' fixture in commit 532918e13d,
  which broke the other tests. Fix them.

- Divide the tests into two "batches" of roughly equal runtime, which
  can be run in parallel

- Merge the 'test_file' and 'test_filter' options in CircleCI config
  into one 'test_selection' option, for simplicity.
---
 .circleci/config.yml                          | 41 +++++----
 test_runner/README.md                         | 12 ++-
 .../{ => batch_others}/test_branch_behind.py  | 23 +++--
 test_runner/{ => batch_others}/test_config.py | 19 ++---
 .../{ => batch_others}/test_pageserver_api.py | 35 ++++----
 test_runner/batch_others/test_pgbench.py      | 17 ++++
 .../{ => batch_pg_regress}/test_pg_regress.py | 22 ++---
 test_runner/conftest.py                       |  1 +
 test_runner/fixtures/zenith_fixtures.py       | 85 +++++++------------
 test_runner/test_broken.py                    |  7 +-
 test_runner/test_pgbench.py                   | 10 ---
 11 files changed, 131 insertions(+), 141 deletions(-)
 rename test_runner/{ => batch_others}/test_branch_behind.py (76%)
 rename test_runner/{ => batch_others}/test_config.py (59%)
 rename test_runner/{ => batch_others}/test_pageserver_api.py (52%)
 create mode 100644 test_runner/batch_others/test_pgbench.py
 rename test_runner/{ => batch_pg_regress}/test_pg_regress.py (75%)
 create mode 100644 test_runner/conftest.py
 delete mode 100644 test_runner/test_pgbench.py

diff --git a/.circleci/config.yml b/.circleci/config.yml
index cbfc47ff61..ec4aea2c31 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -112,14 +112,16 @@ jobs:
     #description: "Run pytest"
     executor: python/default
     parameters:
-      # Use test_filter to name a test or a group of tests with the same prefix.
-      test_filter:
+      # pytest args to specify the tests to run.
+      #
+      # This can be a test file name, e.g. 'test_pgbench.py, or a subdirectory,
+      # or '-k foobar' to run tests containing string 'foobar'. See pytest man page
+      # section SPECIFYING TESTS / SELECTING TESTS for details.
+      #
+      # This parameter is required, to prevent the mistake of running all tests in one job.
+      test_selection:
         type: string
         default: ""
-      # Use test_file to name a python file containing tests to run.
-      # This parameter is required, to prevent the mistake of running all tests in one job.
-      test_file:
-        type: string
       # Arbitrary parameters to pytest. For example "-s" to prevent capturing stdout/stderr
       extra_params:
         type: string
@@ -144,21 +146,17 @@ jobs:
             - POSTGRES_DISTRIB_DIR: /tmp/zenith/pg_install
             - TEST_OUTPUT: /tmp/test_output
           command: |
-            TEST_FILE="<< parameters.test_file >>"
-            TEST_FILTER="<< parameters.test_filter >>"
+            TEST_SELECTION="<< parameters.test_selection >>"
             EXTRA_PARAMS="<< parameters.extra_params >>"
-            if [ -z "$TEST_FILE$TEST_FILTER" ]; then
-              echo "test_file or test_filter must be set"
+            if [ -z "$TEST_SELECTION" ]; then
+              echo "test_selection must be set"
               exit 1
             fi
-            if [ -n "$TEST_FILTER" ]; then
-              TEST_FILTER="-k $TEST_FILTER"
-            fi
             # Run the tests.
             #
             # The junit.xml file allows CircleCI to display more fine-grained test information
             # in its "Tests" tab in the results page.
-            pytest --junitxml=$TEST_OUTPUT/junit.xml --tb=short $TEST_FILE $TEST_FILTER $EXTRA_PARAMS
+            pytest --junitxml=$TEST_OUTPUT/junit.xml --tb=short $TEST_SELECTION $EXTRA_PARAMS
       - run:
           # CircleCI artifacts are preserved one file at a time, so skipping
           # this step isn't a good idea. If you want to extract the
@@ -189,14 +187,13 @@ workflows:
     jobs:
       - build-zenith
       - run-pytest:
-          name: pgbench test
-          test_file: test_pgbench.py
-          requires:
-            - build-zenith
-      - run-pytest:
-          name: pg_regress test
-          test_file: test_pg_regress.py
-          extra_params: -s
+          name: pg_regress tests
+          test_selection: batch_pg_regress
           needs_postgres_source: true
           requires:
             - build-zenith
+      - run-pytest:
+          name: other tests
+          test_selection: batch_others
+          requires:
+            - build-zenith
diff --git a/test_runner/README.md b/test_runner/README.md
index 98535ada58..7519f8e256 100644
--- a/test_runner/README.md
+++ b/test_runner/README.md
@@ -18,6 +18,15 @@ Prerequisites:
 - The zenith git repo, including the postgres submodule
   (for some tests, e.g. pg_regress)
 
+### Test Organization
+
+The tests are divided into a few batches, such that each batch takes roughly
+the same amount of time. The batches can be run in parallel, to minimize total
+runtime. Currently, there are only two batches:
+
+- test_batch_pg_regress: Runs PostgreSQL regression tests
+- test_others: All other tests
+
 ### Running the tests
 
 Because pytest will search all subdirectories for tests, it's easiest to
@@ -44,8 +53,7 @@ Useful environment variables:
 `POSTGRES_DISTRIB_DIR`: The directory where postgres distribution can be found.
 `TEST_OUTPUT`: Set the directory where test state and test output files
 should go.
-`TEST_SHARED_FIXTURES`: Try to re-use a single postgres and pageserver
-for all the tests.
+`TEST_SHARED_FIXTURES`: Try to re-use a single pageserver for all the tests.
 
 Let stdout and stderr go to the terminal instead of capturing them:
 `pytest -s ...`
diff --git a/test_runner/test_branch_behind.py b/test_runner/batch_others/test_branch_behind.py
similarity index 76%
rename from test_runner/test_branch_behind.py
rename to test_runner/batch_others/test_branch_behind.py
index 30ac16c356..a4b5599aee 100644
--- a/test_runner/test_branch_behind.py
+++ b/test_runner/batch_others/test_branch_behind.py
@@ -8,14 +8,13 @@ pytest_plugins = ("fixtures.zenith_fixtures")
 # Create a couple of branches off the main branch, at a historical point in time.
 #
 def test_branch_behind(zenith_cli, pageserver, postgres, pg_bin):
-    zenith_cli.run_init()
-    pageserver.start()
-    print('pageserver is running')
+    # Branch at the point where only 100 rows were inserted
+    zenith_cli.run(["branch", "test_branch_behind", "empty"]);
 
-    pgmain = postgres.create_start()
-    print('postgres is running on main branch')
+    pgmain = postgres.create_start('test_branch_behind')
+    print("postgres is running on 'test_branch_behind' branch")
 
-    main_pg_conn = pgmain.connect();
+    main_pg_conn = psycopg2.connect(pgmain.connstr());
     main_pg_conn.set_isolation_level(psycopg2.extensions.ISOLATION_LEVEL_AUTOCOMMIT)
     main_cur = main_pg_conn.cursor()
 
@@ -33,7 +32,7 @@ def test_branch_behind(zenith_cli, pageserver, postgres, pg_bin):
     print('LSN after 100100 rows: ' + lsn_b)
 
     # Branch at the point where only 100 rows were inserted
-    zenith_cli.run(["branch", "hundred", "main@"+lsn_a]);
+    zenith_cli.run(["branch", "test_branch_behind_hundred", "test_branch_behind@"+lsn_a]);
 
     # Insert many more rows. This generates enough WAL to fill a few segments.
     main_cur.execute("INSERT INTO foo SELECT 'long string to consume some space' || g FROM generate_series(1, 100000) g");
@@ -44,20 +43,20 @@ def test_branch_behind(zenith_cli, pageserver, postgres, pg_bin):
     print('LSN after 200100 rows: ' + lsn_c)
 
     # Branch at the point where only 200 rows were inserted
-    zenith_cli.run(["branch", "more", "main@"+lsn_b]);
+    zenith_cli.run(["branch", "test_branch_behind_more", "test_branch_behind@"+lsn_b]);
 
-    pg_hundred = postgres.create_start("hundred")
-    pg_more = postgres.create_start("more")
+    pg_hundred = postgres.create_start("test_branch_behind_hundred")
+    pg_more = postgres.create_start("test_branch_behind_more")
 
     # On the 'hundred' branch, we should see only 100 rows
-    hundred_pg_conn = pg_hundred.connect()
+    hundred_pg_conn = psycopg2.connect(pg_hundred.connstr())
     hundred_pg_conn.set_isolation_level(psycopg2.extensions.ISOLATION_LEVEL_AUTOCOMMIT)
     hundred_cur = hundred_pg_conn.cursor()
     hundred_cur.execute('SELECT count(*) FROM foo');
     assert(hundred_cur.fetchone()[0] == 100);
 
     # On the 'more' branch, we should see 100200 rows
-    more_pg_conn = pg_more.connect()
+    more_pg_conn = psycopg2.connect(pg_more.connstr())
     more_pg_conn.set_isolation_level(psycopg2.extensions.ISOLATION_LEVEL_AUTOCOMMIT)
     more_cur = more_pg_conn.cursor()
     more_cur.execute('SELECT count(*) FROM foo');
diff --git a/test_runner/test_config.py b/test_runner/batch_others/test_config.py
similarity index 59%
rename from test_runner/test_config.py
rename to test_runner/batch_others/test_config.py
index 1f186363c8..62b69fc198 100644
--- a/test_runner/test_config.py
+++ b/test_runner/batch_others/test_config.py
@@ -6,21 +6,18 @@ import psycopg2
 pytest_plugins = ("fixtures.zenith_fixtures")
 
 
+#
+# Test starting Postgres with custom options
+#
 def test_config(zenith_cli, pageserver, postgres, pg_bin):
-    zenith_cli.run_init()
-    pageserver.start()
-    print('pageserver is running')
+    # Create a branch for us
+    zenith_cli.run(["branch", "test_config", "empty"]);
 
     # change config
-    postgres.create_start(['log_min_messages=debug1'])
+    pg = postgres.create_start('test_config', ['log_min_messages=debug1'])
+    print('postgres is running on test_config branch')
 
-    print('postgres is running')
-
-    username = getpass.getuser()
-    conn_str = 'host={} port={} dbname=postgres user={}'.format(
-        postgres.host, postgres.port, username)
-    print('conn_str is', conn_str)
-    pg_conn = psycopg2.connect(conn_str)
+    pg_conn = psycopg2.connect(pg.connstr())
     pg_conn.set_isolation_level(psycopg2.extensions.ISOLATION_LEVEL_AUTOCOMMIT)
     cur = pg_conn.cursor()
 
diff --git a/test_runner/test_pageserver_api.py b/test_runner/batch_others/test_pageserver_api.py
similarity index 52%
rename from test_runner/test_pageserver_api.py
rename to test_runner/batch_others/test_pageserver_api.py
index 7aae1c4737..5ea65fae51 100644
--- a/test_runner/test_pageserver_api.py
+++ b/test_runner/batch_others/test_pageserver_api.py
@@ -5,44 +5,45 @@ import json
 
 pytest_plugins = ("fixtures.zenith_fixtures")
 
-HOST = 'localhost'
-PAGESERVER_PORT = 64000
-
-def test_status(zen_simple):
-    username = getpass.getuser()
-    conn_str = 'host={} port={} dbname=postgres user={}'.format(
-        HOST, PAGESERVER_PORT, username)
-    pg_conn = psycopg2.connect(conn_str)
+def test_status(pageserver):
+    pg_conn = psycopg2.connect(pageserver.connstr())
     pg_conn.autocommit = True
     cur = pg_conn.cursor()
     cur.execute('status;')
     assert cur.fetchone() == ('hello world',)
     pg_conn.close()
 
-def test_pg_list(zen_simple):
-    username = getpass.getuser()
-    page_server_conn_str = 'host={} port={} dbname=postgres user={}'.format(
-        HOST, PAGESERVER_PORT, username)
-    page_server_conn = psycopg2.connect(page_server_conn_str)
+def test_pg_list(pageserver, zenith_cli):
+
+    # Create a branch for us
+    zenith_cli.run(["branch", "test_pg_list_main", "empty"]);
+
+    page_server_conn = psycopg2.connect(pageserver.connstr())
     page_server_conn.autocommit = True
     page_server_cur = page_server_conn.cursor()
 
     page_server_cur.execute('pg_list;')
     branches = json.loads(page_server_cur.fetchone()[0])
+    # Filter out branches created by other tests
+    branches = [x for x in branches if x['name'].startswith('test_pg_list')]
+
     assert len(branches) == 1
-    assert branches[0]['name'] == 'main'
+    assert branches[0]['name'] == 'test_pg_list_main'
     assert 'timeline_id' in branches[0]
     assert 'latest_valid_lsn' in branches[0]
 
-    zen_simple.zenith_cli.run(['branch', 'experimental', 'main'])
-    zen_simple.zenith_cli.run(['pg', 'create', 'experimental'])
+    # Create another branch, and start Postgres on it
+    zenith_cli.run(['branch', 'test_pg_list_experimental', 'test_pg_list_main'])
+    zenith_cli.run(['pg', 'create', 'test_pg_list_experimental'])
 
     page_server_cur.execute('pg_list;')
     new_branches = json.loads(page_server_cur.fetchone()[0])
+    # Filter out branches created by other tests
+    new_branches = [x for x in new_branches if x['name'].startswith('test_pg_list')]
     assert len(new_branches) == 2
     new_branches.sort(key=lambda k: k['name'])
 
-    assert new_branches[0]['name'] == 'experimental'
+    assert new_branches[0]['name'] == 'test_pg_list_experimental'
     assert new_branches[0]['timeline_id'] != branches[0]['timeline_id']
 
     # TODO: do the LSNs have to match here?
diff --git a/test_runner/batch_others/test_pgbench.py b/test_runner/batch_others/test_pgbench.py
new file mode 100644
index 0000000000..b668012f0c
--- /dev/null
+++ b/test_runner/batch_others/test_pgbench.py
@@ -0,0 +1,17 @@
+import pytest
+
+pytest_plugins = ("fixtures.zenith_fixtures")
+
+
+def test_pgbench(pageserver, postgres, pg_bin, zenith_cli):
+
+    # Create a branch for us
+    zenith_cli.run(["branch", "test_pgbench", "empty"]);
+
+    pg = postgres.create_start('test_pgbench')
+    print("postgres is running on 'test_pgbench' branch")
+
+    connstr = pg.connstr();
+
+    pg_bin.run_capture(['pgbench', '-i', connstr])
+    pg_bin.run_capture(['pgbench'] + '-c 10 -T 5 -P 1 -M prepared'.split() + [connstr])
diff --git a/test_runner/test_pg_regress.py b/test_runner/batch_pg_regress/test_pg_regress.py
similarity index 75%
rename from test_runner/test_pg_regress.py
rename to test_runner/batch_pg_regress/test_pg_regress.py
index ce0a7d4d0a..51144e5e21 100644
--- a/test_runner/test_pg_regress.py
+++ b/test_runner/batch_pg_regress/test_pg_regress.py
@@ -11,14 +11,14 @@ HOST = 'localhost'
 PORT = 55432
 
 
-def test_pg_regress(zen_simple, test_output_dir, pg_distrib_dir, base_dir):
+def test_pg_regress(pageserver, postgres, pg_bin, zenith_cli, test_output_dir, pg_distrib_dir, base_dir, capsys):
+
+    # Create a branch for us
+    zenith_cli.run(["branch", "test_pg_regress", "empty"]);
 
     # Connect to postgres and create a database called "regression".
-    username = getpass.getuser()
-    conn_str = 'host={} port={} dbname=postgres user={}'.format(
-        HOST, PORT, username)
-    print('conn_str is', conn_str)
-    pg_conn = psycopg2.connect(conn_str)
+    pg = postgres.create_start('test_pg_regress')
+    pg_conn = psycopg2.connect(pg.connstr())
     pg_conn.set_isolation_level(psycopg2.extensions.ISOLATION_LEVEL_AUTOCOMMIT)
     cur = pg_conn.cursor()
     cur.execute('CREATE DATABASE regression')
@@ -49,13 +49,13 @@ def test_pg_regress(zen_simple, test_output_dir, pg_distrib_dir, base_dir):
     ]
 
     env = {
-        'PGPORT': str(PORT),
-        'PGUSER': username,
-        'PGHOST': HOST,
+        'PGPORT': str(pg.port),
+        'PGUSER': pg.username,
+        'PGHOST': pg.host,
     }
 
     # Run the command.
     # We don't capture the output. It's not too chatty, and it always
     # logs the exact same data to `regression.out` anyway.
-
-    zen_simple.pg_bin.run(pg_regress_command, env=env, cwd=runpath)
+    with capsys.disabled():
+        pg_bin.run(pg_regress_command, env=env, cwd=runpath)
diff --git a/test_runner/conftest.py b/test_runner/conftest.py
new file mode 100644
index 0000000000..998034bd21
--- /dev/null
+++ b/test_runner/conftest.py
@@ -0,0 +1 @@
+pytest_plugins = ("fixtures.zenith_fixtures")
diff --git a/test_runner/fixtures/zenith_fixtures.py b/test_runner/fixtures/zenith_fixtures.py
index 00da2e19a0..61fb0531ea 100644
--- a/test_runner/fixtures/zenith_fixtures.py
+++ b/test_runner/fixtures/zenith_fixtures.py
@@ -90,10 +90,6 @@ class ZenithCli:
         print('Running command "{}"'.format(' '.join(args)))
         subprocess.run(args, env=self.env, check=True)
 
-    def run_init(self):
-        """ Run the "zenith init <dir>" command. """
-        self.run(['init'])
-
 
 @zenfixture
 def zenith_cli(zenith_binpath, repo_dir, pg_distrib_dir):
@@ -107,24 +103,50 @@ class ZenithPageserver:
         self.zenith_cli = zenith_cli
         self.running = False
 
+    # Initialize the repository, i.e. run "zenith init"
+    def init(self):
+        self.zenith_cli.run(['init'])
+
+    # Start the page server
     def start(self):
         self.zenith_cli.run(['start'])
         self.running = True
 
+    # Stop the page server
     def stop(self):
         self.zenith_cli.run(['stop'])
         self.running = True
 
+    # The page server speaks the Postgres FE/BE protocol, so you can connect
+    # to it with any Postgres client, and run special commands. This function
+    # returns a libpq connection string for connecting to it.
+    def connstr(self):
+        username = getpass.getuser()
+        conn_str = 'host={} port={} dbname=postgres user={}'.format(
+            'localhost', 64000, username)
+        return conn_str
 
+# The 'pageserver' fixture provides a Page Server that's up and running.
+#
+# If TEST_SHARED_FIXTURES is set, the Page Server instance is shared by all
+# the tests. To avoid clashing with other tests, don't use the 'main' branch in
+# the tests directly. Instead, create a branch off the 'empty' branch and use
+# that.
+#
+# By convention, the test branches are named after the tests. For example,
+# test called 'test_foo' would create and use branches with the 'test_foo' prefix.
 @zenfixture
 def pageserver(zenith_cli):
     ps = ZenithPageserver(zenith_cli)
+    ps.init()
+    ps.start()
+    # For convenience in tests, create a branch from the freshly-initialized cluster.
+    zenith_cli.run(["branch", "empty", "main"]);
     yield ps
     # After the yield comes any cleanup code we need.
     print('Starting pageserver cleanup')
     ps.stop()
 
-
 class Postgres:
     """ An object representing a running postgres daemon. """
 
@@ -132,12 +154,13 @@ class Postgres:
         self.zenith_cli = zenith_cli
         self.instance_num = instance_num
         self.running = False
+        self.username = getpass.getuser()
         self.host = 'localhost'
         self.port = 55431 + instance_num
         self.repo_dir = repo_dir
         # path to conf is <repo_dir>/pgdatadirs/pg<instance_num>/postgresql.conf
 
-    def create_start(self, branch="main", config_lines=None):
+    def create_start(self, branch, config_lines=None):
         """ create the pg data directory, and start the server """
         self.zenith_cli.run(['pg', 'create', branch])
         if config_lines is None:
@@ -160,13 +183,11 @@ class Postgres:
         if self.running:
             self.zenith_cli.run(['pg', 'stop', 'pg{}'.format(self.instance_num)])
 
-    # Open a psycopg2 connection to the server, ready for running queries.
-    def connect(self):
-        username = getpass.getuser()
+    # Return a libpq connection string to connect to the Postgres instance
+    def connstr(self):
         conn_str = 'host={} port={} dbname=postgres user={}'.format(
-            self.host, self.port, username)
-
-        return psycopg2.connect(conn_str)
+            self.host, self.port, self.username)
+        return conn_str
 
 class PostgresFactory:
     """ An object representing multiple running postgres daemons. """
@@ -325,43 +346,3 @@ def pg_distrib_dir(base_dir):
     if not os.path.exists(os.path.join(pg_dir, 'bin/postgres')):
         raise Exception('postgres not found at "{}"'.format(pg_dir))
     return pg_dir
-
-
-class SimpleTest:
-    """ A fixture object that contains the things we need for a simple test.
-
-    This is an object with common fixture members:
-        zenith_cli
-        pageserver
-        postgres
-        pg_bin
-
-    Example:
-
-        @zenfixture
-        def my_test(zen_simple):
-            zen_simple.pg_bin.run(['pgbench', '-i'])
-
-    """
-
-    def __init__(self, zenith_cli, pageserver, postgres, pg_bin):
-        self.zenith_cli = zenith_cli
-        self.pageserver = pageserver
-        self.postgres = postgres
-        self.pg_bin = pg_bin
-
-    def start(self):
-        """ Start a pageserver and postgres. """
-        self.zenith_cli.run_init()
-        self.pageserver.start()
-        print('pageserver is running')
-
-        self.postgres.create_start()
-        print('postgres is running')
-
-
-@zenfixture
-def zen_simple(zenith_cli, pageserver, postgres, pg_bin):
-    simple = SimpleTest(zenith_cli, pageserver, postgres, pg_bin)
-    simple.start()
-    return simple
diff --git a/test_runner/test_broken.py b/test_runner/test_broken.py
index fda9fe0b23..4d125c9047 100644
--- a/test_runner/test_broken.py
+++ b/test_runner/test_broken.py
@@ -23,11 +23,10 @@ run_broken = pytest.mark.skipif(
 
 @run_broken
 def test_broken(zenith_cli, pageserver, postgres, pg_bin):
-    zenith_cli.run_init()
-    pageserver.start()
-    print('pageserver is running')
+    # Create a branch for us
+    zenith_cli.run(["branch", "test_broken", "empty"]);
 
-    postgres.create_start()
+    pg = postgres.create_start("test_broken")
     print('postgres is running')
 
     print('THIS NEXT COMMAND WILL FAIL:')
diff --git a/test_runner/test_pgbench.py b/test_runner/test_pgbench.py
deleted file mode 100644
index 33c2be8aad..0000000000
--- a/test_runner/test_pgbench.py
+++ /dev/null
@@ -1,10 +0,0 @@
-import pytest
-
-pytest_plugins = ("fixtures.zenith_fixtures")
-
-
-def test_pgbench(zen_simple):
-    zen_simple.pg_bin.run_capture(
-        'pgbench -h localhost -p 55432 -i postgres'.split())
-    zen_simple.pg_bin.run_capture(
-        'pgbench -h localhost -p 55432 -c 10 -T 5 -P 1 -M prepared postgres'.split())