tests: use high IO concurrency in test_pgdata_import_smoke, use effective_io_concurrency=2 in tests by default (#10114)

## Problem `test_pgdata_import_smoke` writes two gigabytes of pages and then reads them back serially. This is CPU bottlenecked and results in a long runtime, and sensitivity to CPU load from other tests on the same machine. Closes: https://github.com/neondatabase/neon/issues/10071 ## Summary of changes - Use effective_io_concurrency=32 when doing sequential scans through 2GiB of pages in test_pgdata_import_smoke. This is a ~10x runtime decrease in the parts of the test that do sequential scans. - Also set `effective_io_concurrency=2` for tests, as I noticed while debugging that we were doing all getpage requests serially, which is bad for checking the stability of the batching code.
2026-01-08 14:02:55 +00:00 · 2024-12-19 10:58:49 +00:00
parent b135194090
commit 65042cbadd
2 changed files with 15 additions and 3 deletions
--- a/control_plane/src/endpoint.rs
+++ b/control_plane/src/endpoint.rs
@@ -316,6 +316,10 @@ impl Endpoint {
        // and can cause errors like 'no unpinned buffers available', see
        // <https://github.com/neondatabase/neon/issues/9956>
        conf.append("shared_buffers", "1MB");
+        // Postgres defaults to effective_io_concurrency=1, which does not exercise the pageserver's
+        // batching logic.  Set this to 2 so that we exercise the code a bit without letting
+        // individual tests do a lot of concurrent work on underpowered test machines
+        conf.append("effective_io_concurrency", "2");
        conf.append("fsync", "off");
        conf.append("max_connections", "100");
        conf.append("wal_level", "logical");
--- a/test_runner/regress/test_import_pgdata.py
+++ b/test_runner/regress/test_import_pgdata.py
@@ -84,6 +84,8 @@ def test_pgdata_import_smoke(
    elif rel_block_size == RelBlockSize.TWO_STRPES_PER_SHARD:
        target_relblock_size = (shard_count or 1) * stripe_size * 8192 * 2
    elif rel_block_size == RelBlockSize.MULTIPLE_RELATION_SEGMENTS:
+        # Postgres uses a 1GiB segment size, fixed at compile time, so we must use >2GB of data
+        # to exercise multiple segments.
        target_relblock_size = int(((2.333 * 1024 * 1024 * 1024) // 8192) * 8192)
    else:
        raise ValueError
@@ -111,9 +113,15 @@ def test_pgdata_import_smoke(

    def validate_vanilla_equivalence(ep):
        # TODO: would be nicer to just compare pgdump
-        assert ep.safe_psql("select count(*), sum(data::bigint)::bigint from t") == [
-            (expect_nrows, expect_sum)
-        ]
+
+        # Enable IO concurrency for batching on large sequential scan, to avoid making
+        # this test unnecessarily onerous on CPU
+        assert ep.safe_psql_many(
+            [
+                "set effective_io_concurrency=32;",
+                "select count(*), sum(data::bigint)::bigint from t",
+            ]
+        ) == [[], [(expect_nrows, expect_sum)]]

    validate_vanilla_equivalence(vanilla_pg)