Refactor pytest fixtures

Instead of having a lot of separate fixtures for setting up the page server, the compute nodes, the safekeepers etc., have one big ZenithEnv object that encapsulates the whole environment. Every test either uses a shared "zenith_simple_env" fixture, which contains the default setup of a pageserver with no authentication, and no safekeepers. Tests that want to use safekeepers or authentication set up a custom test-specific ZenithEnv fixture. Gathering information about the whole environment into one object makes some things simpler. For example, when a new compute node is created, you no longer need to pass the 'wal_acceptors' connection string as argument to the 'postgres.create_start' function. The 'create_start' function fetches that information directly from the ZenithEnv object.
2026-01-08 05:52:55 +00:00 · 2021-10-25 14:14:47 +03:00
parent 28af3e5008
commit 66ec135676
33 changed files with 723 additions and 808 deletions
--- a/test_runner/performance/test_bulk_insert.py
+++ b/test_runner/performance/test_bulk_insert.py
@@ -1,6 +1,6 @@
 import os
 from contextlib import closing
-from fixtures.zenith_fixtures import PostgresFactory, ZenithPageserver
+from fixtures.zenith_fixtures import ZenithEnv
 from fixtures.log_helper import log

 pytest_plugins = ("fixtures.zenith_fixtures", "fixtures.benchmark_fixture")
@@ -16,20 +16,17 @@ pytest_plugins = ("fixtures.zenith_fixtures", "fixtures.benchmark_fixture")
 # 3. Disk space used
 # 4. Peak memory usage
 #
-def test_bulk_insert(postgres: PostgresFactory,
-                     pageserver: ZenithPageserver,
-                     zenith_cli,
-                     zenbenchmark,
-                     repo_dir: str):
+def test_bulk_insert(zenith_simple_env: ZenithEnv, zenbenchmark):
+    env = zenith_simple_env
    # Create a branch for us
-    zenith_cli.run(["branch", "test_bulk_insert", "empty"])
+    env.zenith_cli(["branch", "test_bulk_insert", "empty"])

-    pg = postgres.create_start('test_bulk_insert')
+    pg = env.postgres.create_start('test_bulk_insert')
    log.info("postgres is running on 'test_bulk_insert' branch")

    # Open a connection directly to the page server that we'll use to force
    # flushing the layers to disk
-    psconn = pageserver.connect()
+    psconn = env.pageserver.connect()
    pscur = psconn.cursor()

    # Get the timeline ID of our branch. We need it for the 'do_gc' command
@@ -41,19 +38,19 @@ def test_bulk_insert(postgres: PostgresFactory,
            cur.execute("create table huge (i int, j int);")

            # Run INSERT, recording the time and I/O it takes
-            with zenbenchmark.record_pageserver_writes(pageserver, 'pageserver_writes'):
+            with zenbenchmark.record_pageserver_writes(env.pageserver, 'pageserver_writes'):
                with zenbenchmark.record_duration('insert'):
                    cur.execute("insert into huge values (generate_series(1, 5000000), 0);")

                    # Flush the layers from memory to disk. This is included in the reported
                    # time and I/O
-                    pscur.execute(f"do_gc {pageserver.initial_tenant} {timeline} 0")
+                    pscur.execute(f"do_gc {env.initial_tenant} {timeline} 0")

            # Record peak memory usage
-            zenbenchmark.record("peak_mem", zenbenchmark.get_peak_mem(pageserver) / 1024, 'MB')
+            zenbenchmark.record("peak_mem", zenbenchmark.get_peak_mem(env.pageserver) / 1024, 'MB')

            # Report disk space used by the repository
-            timeline_size = zenbenchmark.get_timeline_size(repo_dir,
-                                                           pageserver.initial_tenant,
+            timeline_size = zenbenchmark.get_timeline_size(env.repo_dir,
+                                                           env.initial_tenant,
                                                           timeline)
            zenbenchmark.record('size', timeline_size / (1024 * 1024), 'MB')
--- a/test_runner/performance/test_bulk_tenant_create.py
+++ b/test_runner/performance/test_bulk_tenant_create.py
@@ -1,11 +1,7 @@
 import timeit
 import pytest

-from fixtures.zenith_fixtures import (
-    TenantFactory,
-    ZenithCli,
-    PostgresFactory,
-)
+from fixtures.zenith_fixtures import ZenithEnvBuilder

 pytest_plugins = ("fixtures.benchmark_fixture")

@@ -20,37 +16,37 @@ pytest_plugins = ("fixtures.benchmark_fixture")
@pytest.mark.parametrize('tenants_count', [1, 5, 10])
@pytest.mark.parametrize('use_wal_acceptors', ['with_wa', 'without_wa'])
 def test_bulk_tenant_create(
-    zenith_cli: ZenithCli,
-    tenant_factory: TenantFactory,
-    postgres: PostgresFactory,
-    wa_factory,
+    zenith_env_builder: ZenithEnvBuilder,
    use_wal_acceptors: str,
    tenants_count: int,
    zenbenchmark,
 ):
    """Measure tenant creation time (with and without wal acceptors)"""
+    if use_wal_acceptors == 'with_wa':
+        zenith_env_builder.num_safekeepers = 3
+    env = zenith_env_builder.init()

    time_slices = []

    for i in range(tenants_count):
        start = timeit.default_timer()

-        tenant = tenant_factory.create()
-        zenith_cli.run([
+        tenant = env.create_tenant()
+        env.zenith_cli([
            "branch",
            f"test_bulk_tenant_create_{tenants_count}_{i}_{use_wal_acceptors}",
            "main",
            f"--tenantid={tenant}"
        ])

-        if use_wal_acceptors == 'with_wa':
-            wa_factory.start_n_new(3)
+        # FIXME: We used to start new safekeepers here. Did that make sense? Should we do it now?
+        #if use_wal_acceptors == 'with_wa':
+        #    wa_factory.start_n_new(3)

-        pg_tenant = postgres.create_start(
+        pg_tenant = env.postgres.create_start(
            f"test_bulk_tenant_create_{tenants_count}_{i}_{use_wal_acceptors}",
            None,  # branch name, None means same as node name
            tenant,
-            wal_acceptors=wa_factory.get_connstrs() if use_wal_acceptors == 'with_wa' else None,
        )

        end = timeit.default_timer()
--- a/test_runner/performance/test_gist_build.py
+++ b/test_runner/performance/test_gist_build.py
@@ -1,6 +1,6 @@
 import os
 from contextlib import closing
-from fixtures.zenith_fixtures import PostgresFactory, ZenithPageserver
+from fixtures.zenith_fixtures import ZenithEnv
 from fixtures.log_helper import log

 pytest_plugins = ("fixtures.zenith_fixtures", "fixtures.benchmark_fixture")
@@ -11,20 +11,17 @@ pytest_plugins = ("fixtures.zenith_fixtures", "fixtures.benchmark_fixture")
 # As of this writing, we're duplicate those giant WAL records for each page,
 # which makes the delta layer about 32x larger than it needs to be.
 #
-def test_gist_buffering_build(postgres: PostgresFactory,
-                              pageserver: ZenithPageserver,
-                              zenith_cli,
-                              zenbenchmark,
-                              repo_dir: str):
+def test_gist_buffering_build(zenith_simple_env: ZenithEnv, zenbenchmark):
+    env = zenith_simple_env
    # Create a branch for us
-    zenith_cli.run(["branch", "test_gist_buffering_build", "empty"])
+    env.zenith_cli(["branch", "test_gist_buffering_build", "empty"])

-    pg = postgres.create_start('test_gist_buffering_build')
+    pg = env.postgres.create_start('test_gist_buffering_build')
    log.info("postgres is running on 'test_gist_buffering_build' branch")

    # Open a connection directly to the page server that we'll use to force
    # flushing the layers to disk
-    psconn = pageserver.connect()
+    psconn = env.pageserver.connect()
    pscur = psconn.cursor()

    # Get the timeline ID of our branch. We need it for the 'do_gc' command
@@ -40,7 +37,7 @@ def test_gist_buffering_build(postgres: PostgresFactory,
            )

            # Build the index.
-            with zenbenchmark.record_pageserver_writes(pageserver, 'pageserver_writes'):
+            with zenbenchmark.record_pageserver_writes(env.pageserver, 'pageserver_writes'):
                with zenbenchmark.record_duration('build'):
                    cur.execute(
                        "create index gist_pointidx2 on gist_point_tbl using gist(p) with (buffering = on)"
@@ -48,13 +45,13 @@ def test_gist_buffering_build(postgres: PostgresFactory,

                    # Flush the layers from memory to disk. This is included in the reported
                    # time and I/O
-                    pscur.execute(f"do_gc {pageserver.initial_tenant} {timeline} 1000000")
+                    pscur.execute(f"do_gc {env.initial_tenant} {timeline} 1000000")

            # Record peak memory usage
-            zenbenchmark.record("peak_mem", zenbenchmark.get_peak_mem(pageserver) / 1024, 'MB')
+            zenbenchmark.record("peak_mem", zenbenchmark.get_peak_mem(env.pageserver) / 1024, 'MB')

            # Report disk space used by the repository
-            timeline_size = zenbenchmark.get_timeline_size(repo_dir,
-                                                           pageserver.initial_tenant,
+            timeline_size = zenbenchmark.get_timeline_size(env.repo_dir,
+                                                           env.initial_tenant,
                                                           timeline)
            zenbenchmark.record('size', timeline_size / (1024 * 1024), 'MB')
--- a/test_runner/performance/test_perf_pgbench.py
+++ b/test_runner/performance/test_perf_pgbench.py
@@ -1,6 +1,6 @@
 import os
 from contextlib import closing
-from fixtures.zenith_fixtures import PostgresFactory, ZenithPageserver
+from fixtures.zenith_fixtures import ZenithEnv
 from fixtures.log_helper import log

 pytest_plugins = ("fixtures.zenith_fixtures", "fixtures.benchmark_fixture")
@@ -15,21 +15,17 @@ pytest_plugins = ("fixtures.zenith_fixtures", "fixtures.benchmark_fixture")
 # 2. Time to run 5000 pgbench transactions
 # 3. Disk space used
 #
-def test_pgbench(postgres: PostgresFactory,
-                 pageserver: ZenithPageserver,
-                 pg_bin,
-                 zenith_cli,
-                 zenbenchmark,
-                 repo_dir: str):
+def test_pgbench(zenith_simple_env: ZenithEnv, pg_bin, zenbenchmark):
+    env = zenith_simple_env
    # Create a branch for us
-    zenith_cli.run(["branch", "test_pgbench_perf", "empty"])
+    env.zenith_cli(["branch", "test_pgbench_perf", "empty"])

-    pg = postgres.create_start('test_pgbench_perf')
+    pg = env.postgres.create_start('test_pgbench_perf')
    log.info("postgres is running on 'test_pgbench_perf' branch")

    # Open a connection directly to the page server that we'll use to force
    # flushing the layers to disk
-    psconn = pageserver.connect()
+    psconn = env.pageserver.connect()
    pscur = psconn.cursor()

    # Get the timeline ID of our branch. We need it for the 'do_gc' command
@@ -41,13 +37,13 @@ def test_pgbench(postgres: PostgresFactory,
    connstr = pg.connstr()

    # Initialize pgbench database, recording the time and I/O it takes
-    with zenbenchmark.record_pageserver_writes(pageserver, 'pageserver_writes'):
+    with zenbenchmark.record_pageserver_writes(env.pageserver, 'pageserver_writes'):
        with zenbenchmark.record_duration('init'):
            pg_bin.run_capture(['pgbench', '-s5', '-i', connstr])

            # Flush the layers from memory to disk. This is included in the reported
            # time and I/O
-            pscur.execute(f"do_gc {pageserver.initial_tenant} {timeline} 0")
+            pscur.execute(f"do_gc {env.initial_tenant} {timeline} 0")

    # Run pgbench for 5000 transactions
    with zenbenchmark.record_duration('5000_xacts'):
@@ -55,8 +51,8 @@ def test_pgbench(postgres: PostgresFactory,

    # Flush the layers to disk again. This is *not' included in the reported time,
    # though.
-    pscur.execute(f"do_gc {pageserver.initial_tenant} {timeline} 0")
+    pscur.execute(f"do_gc {env.initial_tenant} {timeline} 0")

    # Report disk space used by the repository
-    timeline_size = zenbenchmark.get_timeline_size(repo_dir, pageserver.initial_tenant, timeline)
+    timeline_size = zenbenchmark.get_timeline_size(env.repo_dir, env.initial_tenant, timeline)
    zenbenchmark.record('size', timeline_size / (1024 * 1024), 'MB')
--- a/test_runner/performance/test_write_amplification.py
+++ b/test_runner/performance/test_write_amplification.py
@@ -12,26 +12,23 @@
 # Amplification problem at its finest.
 import os
 from contextlib import closing
-from fixtures.zenith_fixtures import PostgresFactory, ZenithPageserver
+from fixtures.zenith_fixtures import ZenithEnv
 from fixtures.log_helper import log

 pytest_plugins = ("fixtures.zenith_fixtures", "fixtures.benchmark_fixture")


-def test_write_amplification(postgres: PostgresFactory,
-                             pageserver: ZenithPageserver,
-                             zenith_cli,
-                             zenbenchmark,
-                             repo_dir: str):
+def test_write_amplification(zenith_simple_env: ZenithEnv, zenbenchmark):
+    env = zenith_simple_env
    # Create a branch for us
-    zenith_cli.run(["branch", "test_write_amplification", "empty"])
+    env.zenith_cli(["branch", "test_write_amplification", "empty"])

-    pg = postgres.create_start('test_write_amplification')
+    pg = env.postgres.create_start('test_write_amplification')
    log.info("postgres is running on 'test_write_amplification' branch")

    # Open a connection directly to the page server that we'll use to force
    # flushing the layers to disk
-    psconn = pageserver.connect()
+    psconn = env.pageserver.connect()
    pscur = psconn.cursor()

    with closing(pg.connect()) as conn:
@@ -40,7 +37,7 @@ def test_write_amplification(postgres: PostgresFactory,
            cur.execute("SHOW zenith.zenith_timeline")
            timeline = cur.fetchone()[0]

-            with zenbenchmark.record_pageserver_writes(pageserver, 'pageserver_writes'):
+            with zenbenchmark.record_pageserver_writes(env.pageserver, 'pageserver_writes'):
                with zenbenchmark.record_duration('run'):

                    # NOTE: Because each iteration updates every table already created,
@@ -73,10 +70,10 @@ def test_write_amplification(postgres: PostgresFactory,
                        # slower, adding some delays in this loop.  But forcing
                        # the the checkpointing and GC makes the test go faster,
                        # with the same total I/O effect.
-                        pscur.execute(f"do_gc {pageserver.initial_tenant} {timeline} 0")
+                        pscur.execute(f"do_gc {env.initial_tenant} {timeline} 0")

            # Report disk space used by the repository
-            timeline_size = zenbenchmark.get_timeline_size(repo_dir,
-                                                           pageserver.initial_tenant,
+            timeline_size = zenbenchmark.get_timeline_size(env.repo_dir,
+                                                           env.initial_tenant,
                                                           timeline)
            zenbenchmark.record('size', timeline_size / (1024 * 1024), 'MB')