From 65042cbadd0426c43499bb7675e671b5c6e980e9 Mon Sep 17 00:00:00 2001 From: John Spray Date: Thu, 19 Dec 2024 10:58:49 +0000 Subject: [PATCH] tests: use high IO concurrency in `test_pgdata_import_smoke`, use `effective_io_concurrency=2` in tests by default (#10114) ## Problem `test_pgdata_import_smoke` writes two gigabytes of pages and then reads them back serially. This is CPU bottlenecked and results in a long runtime, and sensitivity to CPU load from other tests on the same machine. Closes: https://github.com/neondatabase/neon/issues/10071 ## Summary of changes - Use effective_io_concurrency=32 when doing sequential scans through 2GiB of pages in test_pgdata_import_smoke. This is a ~10x runtime decrease in the parts of the test that do sequential scans. - Also set `effective_io_concurrency=2` for tests, as I noticed while debugging that we were doing all getpage requests serially, which is bad for checking the stability of the batching code. --- control_plane/src/endpoint.rs | 4 ++++ test_runner/regress/test_import_pgdata.py | 14 +++++++++++--- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/control_plane/src/endpoint.rs b/control_plane/src/endpoint.rs index 1fdf326051..5ebf842813 100644 --- a/control_plane/src/endpoint.rs +++ b/control_plane/src/endpoint.rs @@ -316,6 +316,10 @@ impl Endpoint { // and can cause errors like 'no unpinned buffers available', see // conf.append("shared_buffers", "1MB"); + // Postgres defaults to effective_io_concurrency=1, which does not exercise the pageserver's + // batching logic. Set this to 2 so that we exercise the code a bit without letting + // individual tests do a lot of concurrent work on underpowered test machines + conf.append("effective_io_concurrency", "2"); conf.append("fsync", "off"); conf.append("max_connections", "100"); conf.append("wal_level", "logical"); diff --git a/test_runner/regress/test_import_pgdata.py b/test_runner/regress/test_import_pgdata.py index 29229b73c1..6ea2393a9d 100644 --- a/test_runner/regress/test_import_pgdata.py +++ b/test_runner/regress/test_import_pgdata.py @@ -84,6 +84,8 @@ def test_pgdata_import_smoke( elif rel_block_size == RelBlockSize.TWO_STRPES_PER_SHARD: target_relblock_size = (shard_count or 1) * stripe_size * 8192 * 2 elif rel_block_size == RelBlockSize.MULTIPLE_RELATION_SEGMENTS: + # Postgres uses a 1GiB segment size, fixed at compile time, so we must use >2GB of data + # to exercise multiple segments. target_relblock_size = int(((2.333 * 1024 * 1024 * 1024) // 8192) * 8192) else: raise ValueError @@ -111,9 +113,15 @@ def test_pgdata_import_smoke( def validate_vanilla_equivalence(ep): # TODO: would be nicer to just compare pgdump - assert ep.safe_psql("select count(*), sum(data::bigint)::bigint from t") == [ - (expect_nrows, expect_sum) - ] + + # Enable IO concurrency for batching on large sequential scan, to avoid making + # this test unnecessarily onerous on CPU + assert ep.safe_psql_many( + [ + "set effective_io_concurrency=32;", + "select count(*), sum(data::bigint)::bigint from t", + ] + ) == [[], [(expect_nrows, expect_sum)]] validate_vanilla_equivalence(vanilla_pg)