From 6bac7708116ecb82d457b150ef51bc0a330a2237 Mon Sep 17 00:00:00 2001 From: bojanserafimov Date: Thu, 8 Jun 2023 18:11:33 -0400 Subject: [PATCH] Add cold start test (#4436) --- test_runner/performance/test_startup.py | 55 ++++++++++++++++++++++++- 1 file changed, 54 insertions(+), 1 deletion(-) diff --git a/test_runner/performance/test_startup.py b/test_runner/performance/test_startup.py index fa2e058491..9c45088d62 100644 --- a/test_runner/performance/test_startup.py +++ b/test_runner/performance/test_startup.py @@ -1,10 +1,63 @@ from contextlib import closing import pytest -from fixtures.benchmark_fixture import NeonBenchmarker +import requests +from fixtures.benchmark_fixture import MetricReport, NeonBenchmarker from fixtures.neon_fixtures import NeonEnvBuilder +# Just start and measure duration. +# +# This test runs pretty quickly and can be informative when used in combination +# with emulated network delay. Some useful delay commands: +# +# 1. Add 2msec delay to all localhost traffic +# `sudo tc qdisc add dev lo root handle 1:0 netem delay 2msec` +# +# 2. Test that it works (you should see 4ms ping) +# `ping localhost` +# +# 3. Revert back to normal +# `sudo tc qdisc del dev lo root netem` +# +# NOTE this test might not represent the real startup time because the basebackup +# for a large database might be larger if there's a lof of transaction metadata, +# or safekeepers might need more syncing, or there might be more operations to +# apply during config step, like more users, databases, or extensions. By default +# we load extensions 'neon,pg_stat_statements,timescaledb,pg_cron', but in this +# test we only load neon. +def test_startup_simple(neon_env_builder: NeonEnvBuilder, zenbenchmark: NeonBenchmarker): + neon_env_builder.num_safekeepers = 3 + env = neon_env_builder.init_start() + + env.neon_cli.create_branch("test_startup") + + # We do two iterations so we can see if the second startup is faster. It should + # be because the compute node should already be configured with roles, databases, + # extensions, etc from the first run. + for i in range(2): + # Start + with zenbenchmark.record_duration(f"{i}_start_and_select"): + endpoint = env.endpoints.create_start("test_startup") + endpoint.safe_psql("select 1;") + + # Get metrics + metrics = requests.get(f"http://localhost:{endpoint.http_port}/metrics.json").json() + durations = { + "wait_for_spec_ms": f"{i}_wait_for_spec", + "sync_safekeepers_ms": f"{i}_sync_safekeepers", + "basebackup_ms": f"{i}_basebackup", + "config_ms": f"{i}_config", + "total_startup_ms": f"{i}_total_startup", + } + for key, name in durations.items(): + value = metrics[key] + zenbenchmark.record(name, value, "ms", report=MetricReport.LOWER_IS_BETTER) + + # Stop so we can restart + endpoint.stop() + + # This test sometimes runs for longer than the global 5 minute timeout. @pytest.mark.timeout(600) def test_startup(neon_env_builder: NeonEnvBuilder, zenbenchmark: NeonBenchmarker):