From 1b9a27d6e30f086f3ce8a41a617ae551ea0a4b0a Mon Sep 17 00:00:00 2001 From: John Spray Date: Mon, 12 Aug 2024 13:33:09 +0100 Subject: [PATCH] tests: reinstate test_bulk_insert (#8683) ## Problem This test was disabled. ## Summary of changes - Remove the skip marker. - Explicitly avoid doing compaction & gc during checkpoints (the default scale doesn't do anything here, but when experimeting with larger scales it messes things up) - Set a data size that gives a ~20s runtime on a Hetzner dev machine, previous one gave very noisy results because it was so small For reference on a Hetzner AX102: ``` ------------------------------ Benchmark results ------------------------------- test_bulk_insert[neon-release-pg16].insert: 25.664 s test_bulk_insert[neon-release-pg16].pageserver_writes: 5,428 MB test_bulk_insert[neon-release-pg16].peak_mem: 577 MB test_bulk_insert[neon-release-pg16].size: 0 MB test_bulk_insert[neon-release-pg16].data_uploaded: 1,922 MB test_bulk_insert[neon-release-pg16].num_files_uploaded: 8 test_bulk_insert[neon-release-pg16].wal_written: 1,382 MB test_bulk_insert[neon-release-pg16].wal_recovery: 25.373 s test_bulk_insert[neon-release-pg16].compaction: 0.035 s ``` --- test_runner/fixtures/compare_fixtures.py | 25 ++++++++++++++++----- test_runner/performance/test_bulk_insert.py | 14 +++++++----- 2 files changed, 28 insertions(+), 11 deletions(-) diff --git a/test_runner/fixtures/compare_fixtures.py b/test_runner/fixtures/compare_fixtures.py index 08215438e1..5fe544b3bd 100644 --- a/test_runner/fixtures/compare_fixtures.py +++ b/test_runner/fixtures/compare_fixtures.py @@ -42,7 +42,11 @@ class PgCompare(ABC): pass @abstractmethod - def flush(self): + def flush(self, compact: bool = False, gc: bool = False): + pass + + @abstractmethod + def compact(self): pass @abstractmethod @@ -129,13 +133,16 @@ class NeonCompare(PgCompare): def pg_bin(self) -> PgBin: return self._pg_bin - def flush(self): + def flush(self, compact: bool = True, gc: bool = True): wait_for_last_flush_lsn(self.env, self._pg, self.tenant, self.timeline) - self.pageserver_http_client.timeline_checkpoint(self.tenant, self.timeline) - self.pageserver_http_client.timeline_gc(self.tenant, self.timeline, 0) + self.pageserver_http_client.timeline_checkpoint(self.tenant, self.timeline, compact=compact) + if gc: + self.pageserver_http_client.timeline_gc(self.tenant, self.timeline, 0) def compact(self): - self.pageserver_http_client.timeline_compact(self.tenant, self.timeline) + self.pageserver_http_client.timeline_compact( + self.tenant, self.timeline, wait_until_uploaded=True + ) def report_peak_memory_use(self): self.zenbenchmark.record( @@ -215,9 +222,12 @@ class VanillaCompare(PgCompare): def pg_bin(self) -> PgBin: return self._pg.pg_bin - def flush(self): + def flush(self, compact: bool = False, gc: bool = False): self.cur.execute("checkpoint") + def compact(self): + pass + def report_peak_memory_use(self): pass # TODO find something @@ -266,6 +276,9 @@ class RemoteCompare(PgCompare): # TODO: flush the remote pageserver pass + def compact(self): + pass + def report_peak_memory_use(self): # TODO: get memory usage from remote pageserver pass diff --git a/test_runner/performance/test_bulk_insert.py b/test_runner/performance/test_bulk_insert.py index 3dad348976..69df7974b9 100644 --- a/test_runner/performance/test_bulk_insert.py +++ b/test_runner/performance/test_bulk_insert.py @@ -1,9 +1,9 @@ from contextlib import closing -import pytest from fixtures.benchmark_fixture import MetricReport from fixtures.common_types import Lsn from fixtures.compare_fixtures import NeonCompare, PgCompare +from fixtures.log_helper import log from fixtures.pg_version import PgVersion @@ -17,7 +17,6 @@ from fixtures.pg_version import PgVersion # 3. Disk space used # 4. Peak memory usage # -@pytest.mark.skip("See https://github.com/neondatabase/neon/issues/7124") def test_bulk_insert(neon_with_baseline: PgCompare): env = neon_with_baseline @@ -30,8 +29,8 @@ def test_bulk_insert(neon_with_baseline: PgCompare): # Run INSERT, recording the time and I/O it takes with env.record_pageserver_writes("pageserver_writes"): with env.record_duration("insert"): - cur.execute("insert into huge values (generate_series(1, 5000000), 0);") - env.flush() + cur.execute("insert into huge values (generate_series(1, 20000000), 0);") + env.flush(compact=False, gc=False) env.report_peak_memory_use() env.report_size() @@ -49,6 +48,9 @@ def test_bulk_insert(neon_with_baseline: PgCompare): if isinstance(env, NeonCompare): measure_recovery_time(env) + with env.record_duration("compaction"): + env.compact() + def measure_recovery_time(env: NeonCompare): client = env.env.pageserver.http_client() @@ -71,7 +73,9 @@ def measure_recovery_time(env: NeonCompare): # Measure recovery time with env.record_duration("wal_recovery"): + log.info("Entering recovery...") client.timeline_create(pg_version, env.tenant, env.timeline) # Flush, which will also wait for lsn to catch up - env.flush() + env.flush(compact=False, gc=False) + log.info("Finished recovery.")