use a prod-like shared_buffers size for some perf unit tests (#11373)

## Problem

In Neon DBaaS we adjust the shared_buffers to the size of the compute,
or better described we adjust the max number of connections to the
compute size and we adjust the shared_buffers size to the number of max
connections according to about the following sizes
`2 CU: 225mb; 4 CU: 450mb; 8 CU: 900mb`

[see](877e33b428/goapp/controlplane/internal/pkg/compute/computespec/pg_settings.go (L405))

## Summary of changes

We should run perf unit tests with settings that is realistic for a
paying customer and select 8 CU as the reference for those tests.
This commit is contained in:
Peter Bendel
2025-04-02 12:43:05 +02:00
committed by GitHub
parent 7dc8370848
commit 4bc6dbdd5f
9 changed files with 43 additions and 7 deletions

View File

@@ -724,3 +724,20 @@ def skip_on_ci(reason: str):
os.getenv("CI", "false") == "true",
reason=reason,
)
def shared_buffers_for_max_cu(max_cu: float) -> str:
"""
Returns the string value of shared_buffers for the given max CU.
Use shared_buffers size like in production for max CU compute.
See https://github.com/neondatabase/cloud/blob/877e33b4289a471b8f0a35c84009846358f3e5a3/goapp/controlplane/internal/pkg/compute/computespec/pg_settings.go#L405
e.g. // 2 CU: 225mb; 4 CU: 450mb; 8 CU: 900mb
"""
ramBytes = int(4096 * max_cu * 1024 * 1024)
maxConnections = max(100, min(int(ramBytes / 9531392), 4000))
maxWorkerProcesses = 12 + int(max_cu * 2)
maxBackends = 1 + maxConnections + maxWorkerProcesses
sharedBuffersMb = int(max(128, (1023 + maxBackends * 256) / 1024))
sharedBuffers = int(sharedBuffersMb * 1024 / 8)
return str(sharedBuffers)

View File

@@ -2,6 +2,7 @@ from __future__ import annotations
import pytest
from fixtures.neon_fixtures import NeonEnvBuilder, wait_for_last_flush_lsn
from fixtures.utils import shared_buffers_for_max_cu
#
@@ -20,7 +21,10 @@ def test_bulk_update(neon_env_builder: NeonEnvBuilder, zenbenchmark, fillfactor)
timeline_id = env.create_branch("test_bulk_update")
tenant_id = env.initial_tenant
endpoint = env.endpoints.create_start("test_bulk_update")
# use shared_buffers size like in production for 8 CU compute
endpoint = env.endpoints.create_start(
"test_bulk_update", config_lines=[f"shared_buffers={shared_buffers_for_max_cu(8.0)}"]
)
cur = endpoint.connect().cursor()
cur.execute("set statement_timeout=0")

View File

@@ -17,9 +17,10 @@ from fixtures.pageserver.utils import (
wait_for_upload_queue_empty,
)
from fixtures.remote_storage import s3_storage
from fixtures.utils import shared_buffers_for_max_cu
@pytest.mark.timeout(900)
@pytest.mark.timeout(1800)
@pytest.mark.parametrize("size", [8, 1024, 8192])
@pytest.mark.parametrize("s3", [True, False], ids=["s3", "local"])
@pytest.mark.parametrize("backpressure", [True, False], ids=["backpressure", "nobackpressure"])
@@ -60,6 +61,8 @@ def test_ingest_insert_bulk(
f"fsync = {fsync}",
"max_replication_apply_lag = 0",
f"max_replication_flush_lag = {'10GB' if backpressure else '0'}",
# use shared_buffers size like in production for 8 CU compute
f"shared_buffers={shared_buffers_for_max_cu(8.0)}",
# NB: neon_local defaults to 15MB, which is too slow -- production uses 500MB.
f"max_replication_write_lag = {'500MB' if backpressure else '0'}",
],

View File

@@ -12,7 +12,7 @@ from fixtures.neon_fixtures import (
from fixtures.pageserver.utils import wait_for_last_record_lsn
@pytest.mark.timeout(600)
@pytest.mark.timeout(1200)
@pytest.mark.parametrize("size", [1024, 8192, 131072])
@pytest.mark.parametrize("fsync", [True, False], ids=["fsync", "nofsync"])
def test_ingest_logical_message(

View File

@@ -7,6 +7,8 @@ from typing import TYPE_CHECKING
if TYPE_CHECKING:
from fixtures.neon_fixtures import Endpoint, NeonEnv
from fixtures.utils import shared_buffers_for_max_cu
async def repeat_bytes(buf, repetitions: int):
for _ in range(repetitions):
@@ -45,7 +47,10 @@ async def parallel_load_same_table(endpoint: Endpoint, n_parallel: int):
# Load data into one table with COPY TO from 5 parallel connections
def test_parallel_copy(neon_simple_env: NeonEnv, n_parallel=5):
env = neon_simple_env
endpoint = env.endpoints.create_start("main")
# use shared_buffers size like in production for 8 CU compute
endpoint = env.endpoints.create_start(
"main", config_lines=[f"shared_buffers={shared_buffers_for_max_cu(8.0)}"]
)
# Create test table
conn = endpoint.connect()

View File

@@ -6,6 +6,7 @@ from fixtures.benchmark_fixture import NeonBenchmarker
from fixtures.compare_fixtures import RemoteCompare
from fixtures.log_helper import log
from fixtures.neon_fixtures import NeonEnvBuilder
from fixtures.utils import shared_buffers_for_max_cu
def get_num_relations(default: int = 1000) -> list[int]:
@@ -78,7 +79,8 @@ def test_perf_simple_many_relations_reldir_v2(
ep = env.endpoints.create_start(
"main",
config_lines=[
"shared_buffers=1000MB",
# use shared_buffers size like in production for 8 CU compute
f"shared_buffers={shared_buffers_for_max_cu(8.0)}",
"max_locks_per_transaction=16384",
],
)

View File

@@ -19,6 +19,7 @@ from fixtures.pageserver.http import (
from fixtures.pg_version import PgVersion
from fixtures.port_distributor import PortDistributor
from fixtures.remote_storage import MockS3Server, RemoteStorageKind
from fixtures.utils import shared_buffers_for_max_cu
from mypy_boto3_kms import KMSClient
from mypy_boto3_kms.type_defs import EncryptResponseTypeDef
from mypy_boto3_s3 import S3Client
@@ -80,7 +81,8 @@ def test_pgdata_import_smoke(
# doesn't allow any prefetching on v17 and above, where the new streaming
# read machinery keeps buffers pinned while prefetching them. Use a higher
# setting to enable prefetching and speed up the tests
ep_config = ["shared_buffers=64MB"]
# use shared_buffers size like in production for 8 CU compute
ep_config = [f"shared_buffers={shared_buffers_for_max_cu(8.0)}"]
#
# Put data in vanilla pg

View File

@@ -16,6 +16,7 @@ if TYPE_CHECKING:
from fixtures.neon_fixtures import NeonEnvBuilder, PgBin
@pytest.mark.skip("See https://github.com/neondatabase/neon/issues/11395")
def test_pageserver_getpage_throttle(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin):
env = neon_env_builder.init_start()

View File

@@ -7,6 +7,7 @@ from typing import TYPE_CHECKING
import pytest
from fixtures.log_helper import log
from fixtures.neon_fixtures import wait_replica_caughtup
from fixtures.utils import shared_buffers_for_max_cu
if TYPE_CHECKING:
from fixtures.neon_fixtures import NeonEnv
@@ -180,7 +181,8 @@ def test_physical_replication_config_mismatch_too_many_known_xids(neon_simple_en
endpoint_id="primary",
config_lines=[
"max_connections=1000",
"shared_buffers=128MB", # prevent "no unpinned buffers available" error
# use shared_buffers size like in production for 2 CU compute
f"shared_buffers={shared_buffers_for_max_cu(2.0)}", # prevent "no unpinned buffers available" error
],
)
secondary = env.endpoints.new_replica_start(