tests: make location_conf_churn more robust (#8271)

## Problem

This test directly manages locations on pageservers and configuration of
an endpoint. However, it did not switch off the parts of the storage
controller that attempt to do the same: occasionally, the test would
fail in a strange way such as a compute failing to accept a
reconfiguration request.

## Summary of changes

- Wire up the storage controller's compute notification hook to a no-op
handler
- Configure the tenant's scheduling policy to Stop.
This commit is contained in:
John Spray
2024-07-05 10:34:16 +01:00
committed by Vlad Lazar
parent 958abaffe4
commit be7bd407c5

View File

@@ -16,6 +16,8 @@ from fixtures.pageserver.utils import (
from fixtures.remote_storage import LocalFsStorage, RemoteStorageKind, S3Storage, s3_storage
from fixtures.utils import wait_until
from fixtures.workload import Workload
from werkzeug.wrappers.request import Request
from werkzeug.wrappers.response import Response
# A tenant configuration that is convenient for generating uploads and deletions
# without a large amount of postgres traffic.
@@ -59,7 +61,7 @@ def evict_random_layers(
@pytest.mark.parametrize("seed", [1, 2, 3])
def test_location_conf_churn(neon_env_builder: NeonEnvBuilder, seed: int):
def test_location_conf_churn(neon_env_builder: NeonEnvBuilder, make_httpserver, seed: int):
"""
Issue many location configuration changes, ensure that tenants
remain readable & we don't get any unexpected errors. We should
@@ -73,6 +75,20 @@ def test_location_conf_churn(neon_env_builder: NeonEnvBuilder, seed: int):
neon_env_builder.enable_pageserver_remote_storage(
remote_storage_kind=s3_storage(),
)
neon_env_builder.control_plane_compute_hook_api = (
f"http://{make_httpserver.host}:{make_httpserver.port}/notify-attach"
)
def ignore_notify(request: Request):
# This test does all its own compute configuration (by passing explicit pageserver ID to Workload functions),
# so we send controller notifications to /dev/null to prevent it fighting the test for control of the compute.
log.info(f"Ignoring storage controller compute notification: {request.json}")
return Response(status=200)
make_httpserver.expect_request("/notify-attach", method="PUT").respond_with_handler(
ignore_notify
)
env = neon_env_builder.init_start(initial_tenant_conf=TENANT_CONF)
pageservers = env.pageservers
@@ -99,6 +115,15 @@ def test_location_conf_churn(neon_env_builder: NeonEnvBuilder, seed: int):
workload.init(env.pageservers[0].id)
workload.write_rows(256, env.pageservers[0].id)
# Discourage the storage controller from interfering with the changes we will make directly on the pageserver
env.storage_controller.tenant_policy_update(
tenant_id,
{
"scheduling": "Stop",
},
)
env.storage_controller.allowed_errors.append(".*Scheduling is disabled by policy Stop.*")
# We use a fixed seed to make the test reproducible: we want a randomly
# chosen order, but not to change the order every time we run the test.
rng = random.Random(seed)