From fdc15de8b25a55e88b436fcd9be082540d3a9ff4 Mon Sep 17 00:00:00 2001
From: bojanserafimov <bojan.serafimov7@gmail.com>
Date: Fri, 18 Feb 2022 15:46:29 -0500
Subject: [PATCH] Add perf test: test_random_writes (#1292)

---
 test_runner/performance/test_random_writes.py | 79 +++++++++++++++++++
 1 file changed, 79 insertions(+)
 create mode 100644 test_runner/performance/test_random_writes.py

diff --git a/test_runner/performance/test_random_writes.py b/test_runner/performance/test_random_writes.py
new file mode 100644
index 0000000000..b41f2f72a8
--- /dev/null
+++ b/test_runner/performance/test_random_writes.py
@@ -0,0 +1,79 @@
+import os
+from contextlib import closing
+from fixtures.benchmark_fixture import MetricReport
+from fixtures.zenith_fixtures import ZenithEnv
+from fixtures.compare_fixtures import PgCompare, VanillaCompare, ZenithCompare
+from fixtures.log_helper import log
+
+import psycopg2.extras
+import random
+import time
+from fixtures.utils import print_gc_result
+
+
+# This is a clear-box test that demonstrates the worst case scenario for the
+# "1 segment per layer" implementation of the pageserver. It writes to random
+# rows, while almost never writing to the same segment twice before flushing.
+# A naive pageserver implementation would create a full image layer for each
+# dirty segment, leading to write_amplification = segment_size / page_size,
+# when compared to vanilla postgres. With segment_size = 10MB, that's 1250.
+def test_random_writes(zenith_with_baseline: PgCompare):
+    env = zenith_with_baseline
+
+    # Number of rows in the test database. 1M rows runs quickly, but implies
+    # a small effective_checkpoint_distance, which makes the test less realistic.
+    # Using a 300 TB database would imply a 250 MB effective_checkpoint_distance,
+    # but it will take a very long time to run. From what I've seen so far,
+    # increasing n_rows doesn't have impact on the (zenith_runtime / vanilla_runtime)
+    # performance ratio.
+    n_rows = 1 * 1000 * 1000  # around 36 MB table
+
+    # Number of writes per 3 segments. A value of 1 should produce a random
+    # workload where we almost never write to the same segment twice. Larger
+    # values of load_factor produce a larger effective_checkpoint_distance,
+    # making the test more realistic, but less effective. If you want a realistic
+    # worst case scenario and you have time to wait you should increase n_rows instead.
+    load_factor = 1
+
+    # Not sure why but this matters in a weird way (up to 2x difference in perf).
+    # TODO look into it
+    n_iterations = 1
+
+    with closing(env.pg.connect()) as conn:
+        with conn.cursor() as cur:
+            # Create the test table
+            with env.record_duration('init'):
+                cur.execute("""
+                    CREATE TABLE Big(
+                        pk integer primary key,
+                        count integer default 0
+                    );
+                """)
+                cur.execute(f"INSERT INTO Big (pk) values (generate_series(1,{n_rows}))")
+
+            # Get table size (can't be predicted because padding and alignment)
+            cur.execute("SELECT pg_relation_size('Big');")
+            row = cur.fetchone()
+            table_size = row[0]
+            env.zenbenchmark.record("table_size", table_size, 'bytes', MetricReport.TEST_PARAM)
+
+            # Decide how much to write, based on knowledge of pageserver implementation.
+            # Avoiding segment collisions maximizes (zenith_runtime / vanilla_runtime).
+            segment_size = 10 * 1024 * 1024
+            n_segments = table_size // segment_size
+            n_writes = load_factor * n_segments // 3
+
+            # The closer this is to 250 MB, the more realistic the test is.
+            effective_checkpoint_distance = table_size * n_writes // n_rows
+            env.zenbenchmark.record("effective_checkpoint_distance",
+                                    effective_checkpoint_distance,
+                                    'bytes',
+                                    MetricReport.TEST_PARAM)
+
+            # Update random keys
+            with env.record_duration('run'):
+                for it in range(n_iterations):
+                    for i in range(n_writes):
+                        key = random.randint(1, n_rows)
+                        cur.execute(f"update Big set count=count+1 where pk={key}")
+                    env.flush()