From be7bd407c5907ee935491010daa33546757c99fa Mon Sep 17 00:00:00 2001 From: John Spray Date: Fri, 5 Jul 2024 10:34:16 +0100 Subject: [PATCH] tests: make location_conf_churn more robust (#8271) ## Problem This test directly manages locations on pageservers and configuration of an endpoint. However, it did not switch off the parts of the storage controller that attempt to do the same: occasionally, the test would fail in a strange way such as a compute failing to accept a reconfiguration request. ## Summary of changes - Wire up the storage controller's compute notification hook to a no-op handler - Configure the tenant's scheduling policy to Stop. --- .../regress/test_pageserver_secondary.py | 27 ++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/test_runner/regress/test_pageserver_secondary.py b/test_runner/regress/test_pageserver_secondary.py index 4c828b86b0..0416078ebc 100644 --- a/test_runner/regress/test_pageserver_secondary.py +++ b/test_runner/regress/test_pageserver_secondary.py @@ -16,6 +16,8 @@ from fixtures.pageserver.utils import ( from fixtures.remote_storage import LocalFsStorage, RemoteStorageKind, S3Storage, s3_storage from fixtures.utils import wait_until from fixtures.workload import Workload +from werkzeug.wrappers.request import Request +from werkzeug.wrappers.response import Response # A tenant configuration that is convenient for generating uploads and deletions # without a large amount of postgres traffic. @@ -59,7 +61,7 @@ def evict_random_layers( @pytest.mark.parametrize("seed", [1, 2, 3]) -def test_location_conf_churn(neon_env_builder: NeonEnvBuilder, seed: int): +def test_location_conf_churn(neon_env_builder: NeonEnvBuilder, make_httpserver, seed: int): """ Issue many location configuration changes, ensure that tenants remain readable & we don't get any unexpected errors. We should @@ -73,6 +75,20 @@ def test_location_conf_churn(neon_env_builder: NeonEnvBuilder, seed: int): neon_env_builder.enable_pageserver_remote_storage( remote_storage_kind=s3_storage(), ) + neon_env_builder.control_plane_compute_hook_api = ( + f"http://{make_httpserver.host}:{make_httpserver.port}/notify-attach" + ) + + def ignore_notify(request: Request): + # This test does all its own compute configuration (by passing explicit pageserver ID to Workload functions), + # so we send controller notifications to /dev/null to prevent it fighting the test for control of the compute. + log.info(f"Ignoring storage controller compute notification: {request.json}") + return Response(status=200) + + make_httpserver.expect_request("/notify-attach", method="PUT").respond_with_handler( + ignore_notify + ) + env = neon_env_builder.init_start(initial_tenant_conf=TENANT_CONF) pageservers = env.pageservers @@ -99,6 +115,15 @@ def test_location_conf_churn(neon_env_builder: NeonEnvBuilder, seed: int): workload.init(env.pageservers[0].id) workload.write_rows(256, env.pageservers[0].id) + # Discourage the storage controller from interfering with the changes we will make directly on the pageserver + env.storage_controller.tenant_policy_update( + tenant_id, + { + "scheduling": "Stop", + }, + ) + env.storage_controller.allowed_errors.append(".*Scheduling is disabled by policy Stop.*") + # We use a fixed seed to make the test reproducible: we want a randomly # chosen order, but not to change the order every time we run the test. rng = random.Random(seed)