From 65042cbadd0426c43499bb7675e671b5c6e980e9 Mon Sep 17 00:00:00 2001
From: John Spray <john@neon.tech>
Date: Thu, 19 Dec 2024 10:58:49 +0000
Subject: [PATCH] tests: use high IO concurrency in `test_pgdata_import_smoke`,
 use `effective_io_concurrency=2` in tests by default (#10114)

## Problem

`test_pgdata_import_smoke` writes two gigabytes of pages and then reads
them back serially. This is CPU bottlenecked and results in a long
runtime, and sensitivity to CPU load from other tests on the same
machine.

Closes: https://github.com/neondatabase/neon/issues/10071

## Summary of changes

- Use effective_io_concurrency=32 when doing sequential scans through
2GiB of pages in test_pgdata_import_smoke. This is a ~10x runtime
decrease in the parts of the test that do sequential scans.
- Also set `effective_io_concurrency=2` for tests, as I noticed while
debugging that we were doing all getpage requests serially, which is bad
for checking the stability of the batching code.
---
 control_plane/src/endpoint.rs             |  4 ++++
 test_runner/regress/test_import_pgdata.py | 14 +++++++++++---
 2 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/control_plane/src/endpoint.rs b/control_plane/src/endpoint.rs
index 1fdf326051..5ebf842813 100644
--- a/control_plane/src/endpoint.rs
+++ b/control_plane/src/endpoint.rs
@@ -316,6 +316,10 @@ impl Endpoint {
         // and can cause errors like 'no unpinned buffers available', see
         // <https://github.com/neondatabase/neon/issues/9956>
         conf.append("shared_buffers", "1MB");
+        // Postgres defaults to effective_io_concurrency=1, which does not exercise the pageserver's
+        // batching logic.  Set this to 2 so that we exercise the code a bit without letting
+        // individual tests do a lot of concurrent work on underpowered test machines
+        conf.append("effective_io_concurrency", "2");
         conf.append("fsync", "off");
         conf.append("max_connections", "100");
         conf.append("wal_level", "logical");
diff --git a/test_runner/regress/test_import_pgdata.py b/test_runner/regress/test_import_pgdata.py
index 29229b73c1..6ea2393a9d 100644
--- a/test_runner/regress/test_import_pgdata.py
+++ b/test_runner/regress/test_import_pgdata.py
@@ -84,6 +84,8 @@ def test_pgdata_import_smoke(
     elif rel_block_size == RelBlockSize.TWO_STRPES_PER_SHARD:
         target_relblock_size = (shard_count or 1) * stripe_size * 8192 * 2
     elif rel_block_size == RelBlockSize.MULTIPLE_RELATION_SEGMENTS:
+        # Postgres uses a 1GiB segment size, fixed at compile time, so we must use >2GB of data
+        # to exercise multiple segments.
         target_relblock_size = int(((2.333 * 1024 * 1024 * 1024) // 8192) * 8192)
     else:
         raise ValueError
@@ -111,9 +113,15 @@ def test_pgdata_import_smoke(
 
     def validate_vanilla_equivalence(ep):
         # TODO: would be nicer to just compare pgdump
-        assert ep.safe_psql("select count(*), sum(data::bigint)::bigint from t") == [
-            (expect_nrows, expect_sum)
-        ]
+
+        # Enable IO concurrency for batching on large sequential scan, to avoid making
+        # this test unnecessarily onerous on CPU
+        assert ep.safe_psql_many(
+            [
+                "set effective_io_concurrency=32;",
+                "select count(*), sum(data::bigint)::bigint from t",
+            ]
+        ) == [[], [(expect_nrows, expect_sum)]]
 
     validate_vanilla_equivalence(vanilla_pg)