From bbe4dfa99154b679371e0bdfa9d648d1ebdae2ee Mon Sep 17 00:00:00 2001 From: Erik Grinaker Date: Tue, 3 Dec 2024 15:33:31 +0100 Subject: [PATCH] test_runner: use immediate shutdown in `test_sharded_ingest` (#9984) ## Problem `test_sharded_ingest` ingests a lot of data, which can cause shutdown to be slow e.g. due to local "S3 uploads" or compactions. This can cause test flakes during teardown. Resolves #9740. ## Summary of changes Perform an immediate shutdown of the cluster. --- test_runner/performance/test_sharded_ingest.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/test_runner/performance/test_sharded_ingest.py b/test_runner/performance/test_sharded_ingest.py index 4c21e799c8..94fd54bade 100644 --- a/test_runner/performance/test_sharded_ingest.py +++ b/test_runner/performance/test_sharded_ingest.py @@ -90,6 +90,7 @@ def test_sharded_ingest( # Start the endpoint. endpoint = env.endpoints.create_start("main", tenant_id=tenant_id) start_lsn = Lsn(endpoint.safe_psql("select pg_current_wal_lsn()")[0][0]) + # Ingest data and measure WAL volume and duration. with closing(endpoint.connect()) as conn: with conn.cursor() as cur: @@ -104,6 +105,8 @@ def test_sharded_ingest( wait_for_last_flush_lsn(env, endpoint, tenant_id, timeline_id) end_lsn = Lsn(endpoint.safe_psql("select pg_current_wal_lsn()")[0][0]) + + # Record metrics. wal_written_mb = round((end_lsn - start_lsn) / (1024 * 1024)) zenbenchmark.record("wal_written", wal_written_mb, "MB", MetricReport.TEST_PARAM) @@ -152,3 +155,7 @@ def test_sharded_ingest( log.info(f"WAL ingested by each pageserver {ingested_by_ps}") assert tenant_get_shards(env, tenant_id) == shards, "shards moved" + + # The pageservers can take a long time to shut down gracefully, presumably due to the upload + # queue or compactions or something. Just stop them immediately, we don't care. + env.stop(immediate=True)