use a prod-like shared_buffers size for some perf unit tests (#11373)

## Problem In Neon DBaaS we adjust the shared_buffers to the size of the compute, or better described we adjust the max number of connections to the compute size and we adjust the shared_buffers size to the number of max connections according to about the following sizes `2 CU: 225mb; 4 CU: 450mb; 8 CU: 900mb` [see](877e33b428/goapp/controlplane/internal/pkg/compute/computespec/pg_settings.go (L405)) ## Summary of changes We should run perf unit tests with settings that is realistic for a paying customer and select 8 CU as the reference for those tests.
2026-06-03 13:30:38 +00:00 · 2025-04-02 12:43:05 +02:00
parent 7dc8370848
commit 4bc6dbdd5f
9 changed files with 43 additions and 7 deletions
--- a/test_runner/fixtures/utils.py
+++ b/test_runner/fixtures/utils.py
@@ -724,3 +724,20 @@ def skip_on_ci(reason: str):
        os.getenv("CI", "false") == "true",
        reason=reason,
    )
+
+
+def shared_buffers_for_max_cu(max_cu: float) -> str:
+    """
+    Returns the string value of shared_buffers for the given max CU.
+    Use shared_buffers size like in production for max CU compute.
+    See https://github.com/neondatabase/cloud/blob/877e33b4289a471b8f0a35c84009846358f3e5a3/goapp/controlplane/internal/pkg/compute/computespec/pg_settings.go#L405
+
+    e.g. // 2 CU: 225mb; 4 CU: 450mb; 8 CU: 900mb
+    """
+    ramBytes = int(4096 * max_cu * 1024 * 1024)
+    maxConnections = max(100, min(int(ramBytes / 9531392), 4000))
+    maxWorkerProcesses = 12 + int(max_cu * 2)
+    maxBackends = 1 + maxConnections + maxWorkerProcesses
+    sharedBuffersMb = int(max(128, (1023 + maxBackends * 256) / 1024))
+    sharedBuffers = int(sharedBuffersMb * 1024 / 8)
+    return str(sharedBuffers)
--- a/test_runner/performance/test_bulk_update.py
+++ b/test_runner/performance/test_bulk_update.py
@@ -2,6 +2,7 @@ from __future__ import annotations

 import pytest
 from fixtures.neon_fixtures import NeonEnvBuilder, wait_for_last_flush_lsn
+from fixtures.utils import shared_buffers_for_max_cu


 #
@@ -20,7 +21,10 @@ def test_bulk_update(neon_env_builder: NeonEnvBuilder, zenbenchmark, fillfactor)

    timeline_id = env.create_branch("test_bulk_update")
    tenant_id = env.initial_tenant
-    endpoint = env.endpoints.create_start("test_bulk_update")
+    # use shared_buffers size like in production for 8 CU compute
+    endpoint = env.endpoints.create_start(
+        "test_bulk_update", config_lines=[f"shared_buffers={shared_buffers_for_max_cu(8.0)}"]
+    )
    cur = endpoint.connect().cursor()
    cur.execute("set statement_timeout=0")

--- a/test_runner/performance/test_ingest_insert_bulk.py
+++ b/test_runner/performance/test_ingest_insert_bulk.py
@@ -17,9 +17,10 @@ from fixtures.pageserver.utils import (
    wait_for_upload_queue_empty,
 )
 from fixtures.remote_storage import s3_storage
+from fixtures.utils import shared_buffers_for_max_cu


-@pytest.mark.timeout(900)
+@pytest.mark.timeout(1800)
@pytest.mark.parametrize("size", [8, 1024, 8192])
@pytest.mark.parametrize("s3", [True, False], ids=["s3", "local"])
@pytest.mark.parametrize("backpressure", [True, False], ids=["backpressure", "nobackpressure"])
@@ -60,6 +61,8 @@ def test_ingest_insert_bulk(
            f"fsync = {fsync}",
            "max_replication_apply_lag = 0",
            f"max_replication_flush_lag = {'10GB' if backpressure else '0'}",
+            # use shared_buffers size like in production for 8 CU compute
+            f"shared_buffers={shared_buffers_for_max_cu(8.0)}",
            # NB: neon_local defaults to 15MB, which is too slow -- production uses 500MB.
            f"max_replication_write_lag = {'500MB' if backpressure else '0'}",
        ],
--- a/test_runner/performance/test_ingest_logical_message.py
+++ b/test_runner/performance/test_ingest_logical_message.py
@@ -12,7 +12,7 @@ from fixtures.neon_fixtures import (
 from fixtures.pageserver.utils import wait_for_last_record_lsn


-@pytest.mark.timeout(600)
+@pytest.mark.timeout(1200)
@pytest.mark.parametrize("size", [1024, 8192, 131072])
@pytest.mark.parametrize("fsync", [True, False], ids=["fsync", "nofsync"])
 def test_ingest_logical_message(
--- a/test_runner/performance/test_parallel_copy.py
+++ b/test_runner/performance/test_parallel_copy.py
@@ -7,6 +7,8 @@ from typing import TYPE_CHECKING
 if TYPE_CHECKING:
    from fixtures.neon_fixtures import Endpoint, NeonEnv

+from fixtures.utils import shared_buffers_for_max_cu
+

 async def repeat_bytes(buf, repetitions: int):
    for _ in range(repetitions):
@@ -45,7 +47,10 @@ async def parallel_load_same_table(endpoint: Endpoint, n_parallel: int):
 # Load data into one table with COPY TO from 5 parallel connections
 def test_parallel_copy(neon_simple_env: NeonEnv, n_parallel=5):
    env = neon_simple_env
-    endpoint = env.endpoints.create_start("main")
+    # use shared_buffers size like in production for 8 CU compute
+    endpoint = env.endpoints.create_start(
+        "main", config_lines=[f"shared_buffers={shared_buffers_for_max_cu(8.0)}"]
+    )

    # Create test table
    conn = endpoint.connect()
--- a/test_runner/performance/test_perf_many_relations.py
+++ b/test_runner/performance/test_perf_many_relations.py
@@ -6,6 +6,7 @@ from fixtures.benchmark_fixture import NeonBenchmarker
 from fixtures.compare_fixtures import RemoteCompare
 from fixtures.log_helper import log
 from fixtures.neon_fixtures import NeonEnvBuilder
+from fixtures.utils import shared_buffers_for_max_cu


 def get_num_relations(default: int = 1000) -> list[int]:
@@ -78,7 +79,8 @@ def test_perf_simple_many_relations_reldir_v2(
    ep = env.endpoints.create_start(
        "main",
        config_lines=[
-            "shared_buffers=1000MB",
+            # use shared_buffers size like in production for 8 CU compute
+            f"shared_buffers={shared_buffers_for_max_cu(8.0)}",
            "max_locks_per_transaction=16384",
        ],
    )
--- a/test_runner/regress/test_import_pgdata.py
+++ b/test_runner/regress/test_import_pgdata.py
@@ -19,6 +19,7 @@ from fixtures.pageserver.http import (
 from fixtures.pg_version import PgVersion
 from fixtures.port_distributor import PortDistributor
 from fixtures.remote_storage import MockS3Server, RemoteStorageKind
+from fixtures.utils import shared_buffers_for_max_cu
 from mypy_boto3_kms import KMSClient
 from mypy_boto3_kms.type_defs import EncryptResponseTypeDef
 from mypy_boto3_s3 import S3Client
@@ -80,7 +81,8 @@ def test_pgdata_import_smoke(
    # doesn't allow any prefetching on v17 and above, where the new streaming
    # read machinery keeps buffers pinned while prefetching them.  Use a higher
    # setting to enable prefetching and speed up the tests
-    ep_config = ["shared_buffers=64MB"]
+    # use shared_buffers size like in production for 8 CU compute
+    ep_config = [f"shared_buffers={shared_buffers_for_max_cu(8.0)}"]

    #
    # Put data in vanilla pg
--- a/test_runner/regress/test_pageserver_getpage_throttle.py
+++ b/test_runner/regress/test_pageserver_getpage_throttle.py
@@ -16,6 +16,7 @@ if TYPE_CHECKING:
    from fixtures.neon_fixtures import NeonEnvBuilder, PgBin


+@pytest.mark.skip("See https://github.com/neondatabase/neon/issues/11395")
 def test_pageserver_getpage_throttle(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin):
    env = neon_env_builder.init_start()

--- a/test_runner/regress/test_physical_replication.py
+++ b/test_runner/regress/test_physical_replication.py
@@ -7,6 +7,7 @@ from typing import TYPE_CHECKING
 import pytest
 from fixtures.log_helper import log
 from fixtures.neon_fixtures import wait_replica_caughtup
+from fixtures.utils import shared_buffers_for_max_cu

 if TYPE_CHECKING:
    from fixtures.neon_fixtures import NeonEnv
@@ -180,7 +181,8 @@ def test_physical_replication_config_mismatch_too_many_known_xids(neon_simple_en
        endpoint_id="primary",
        config_lines=[
            "max_connections=1000",
-            "shared_buffers=128MB",  # prevent "no unpinned buffers available" error
+            # use shared_buffers size like in production for 2 CU compute
+            f"shared_buffers={shared_buffers_for_max_cu(2.0)}",  # prevent "no unpinned buffers available" error
        ],
    )
    secondary = env.endpoints.new_replica_start(