diff --git a/test_runner/performance/test_lazy_startup.py b/test_runner/performance/test_lazy_startup.py index 1a431e272e..e929bd4d05 100644 --- a/test_runner/performance/test_lazy_startup.py +++ b/test_runner/performance/test_lazy_startup.py @@ -26,86 +26,81 @@ from fixtures.neon_fixtures import NeonEnvBuilder # apply during config step, like more users, databases, or extensions. By default # we load extensions 'neon,pg_stat_statements,timescaledb,pg_cron', but in this # test we only load neon. -@pytest.mark.timeout(1000) -def test_lazy_startup(neon_env_builder: NeonEnvBuilder, zenbenchmark: NeonBenchmarker): +@pytest.mark.timeout(1800) +@pytest.mark.parametrize("slru", ["lazy", "eager"]) +def test_lazy_startup(slru: str, neon_env_builder: NeonEnvBuilder, zenbenchmark: NeonBenchmarker): neon_env_builder.num_safekeepers = 3 env = neon_env_builder.init_start() - lazy_tenant, _ = env.neon_cli.create_tenant( + lazy_slru_download = "true" if slru == "lazy" else "false" + tenant, _ = env.neon_cli.create_tenant( conf={ - "lazy_slru_download": "true", + "lazy_slru_download": lazy_slru_download, } ) - eager_tenant, _ = env.neon_cli.create_tenant( - conf={ - "lazy_slru_download": "false", - } - ) - tenants = [lazy_tenant, eager_tenant] - slru = "lazy" - for tenant in tenants: - endpoint = env.endpoints.create_start("main", tenant_id=tenant) - endpoint.safe_psql("CREATE TABLE t (pk integer PRIMARY KEY, x integer)") - endpoint.safe_psql("ALTER TABLE t SET (autovacuum_enabled = false)") - endpoint.safe_psql("INSERT INTO t VALUES (1, 0)") - endpoint.safe_psql( - """ - CREATE PROCEDURE updating() as - $$ - DECLARE - i integer; - BEGIN - FOR i IN 1..10000000 LOOP - UPDATE t SET x = x + 1 WHERE pk=1; - COMMIT; - END LOOP; - END - $$ LANGUAGE plpgsql - """ - ) - endpoint.safe_psql("SET statement_timeout=0") - endpoint.safe_psql("call updating()") + endpoint = env.endpoints.create_start("main", tenant_id=tenant) + with endpoint.cursor() as cur: + cur.execute("CREATE TABLE t (pk integer PRIMARY KEY, x integer)") + cur.execute("ALTER TABLE t SET (autovacuum_enabled = false)") + cur.execute("INSERT INTO t VALUES (1, 0)") + cur.execute( + """ + CREATE PROCEDURE updating() as + $$ + DECLARE + i integer; + BEGIN + FOR i IN 1..1000000 LOOP + UPDATE t SET x = x + 1 WHERE pk=1; + COMMIT; + END LOOP; + END + $$ LANGUAGE plpgsql + """ + ) + cur.execute("SET statement_timeout=0") + cur.execute("call updating()") + + endpoint.stop() + + # We do two iterations so we can see if the second startup is faster. It should + # be because the compute node should already be configured with roles, databases, + # extensions, etc from the first run. + for i in range(2): + # Start + with zenbenchmark.record_duration(f"{slru}_{i}_start"): + endpoint.start() + + with zenbenchmark.record_duration(f"{slru}_{i}_select"): + sum = endpoint.safe_psql("select sum(x) from t")[0][0] + assert sum == 1000000 + + # Get metrics + metrics = requests.get(f"http://localhost:{endpoint.http_port}/metrics.json").json() + durations = { + "wait_for_spec_ms": f"{slru}_{i}_wait_for_spec", + "sync_safekeepers_ms": f"{slru}_{i}_sync_safekeepers", + "sync_sk_check_ms": f"{slru}_{i}_sync_sk_check", + "basebackup_ms": f"{slru}_{i}_basebackup", + "start_postgres_ms": f"{slru}_{i}_start_postgres", + "config_ms": f"{slru}_{i}_config", + "total_startup_ms": f"{slru}_{i}_total_startup", + } + for key, name in durations.items(): + value = metrics[key] + zenbenchmark.record(name, value, "ms", report=MetricReport.LOWER_IS_BETTER) + + basebackup_bytes = metrics["basebackup_bytes"] + zenbenchmark.record( + f"{slru}_{i}_basebackup_bytes", + basebackup_bytes, + "bytes", + report=MetricReport.LOWER_IS_BETTER, + ) + + # Stop so we can restart endpoint.stop() - # We do two iterations so we can see if the second startup is faster. It should - # be because the compute node should already be configured with roles, databases, - # extensions, etc from the first run. - for i in range(2): - # Start - with zenbenchmark.record_duration(f"{slru}_{i}_start"): - endpoint.start() - - with zenbenchmark.record_duration(f"{slru}_{i}_select"): - sum = endpoint.safe_psql("select sum(x) from t")[0][0] - assert sum == 10000000 - - # Get metrics - metrics = requests.get(f"http://localhost:{endpoint.http_port}/metrics.json").json() - durations = { - "wait_for_spec_ms": f"{slru}_{i}_wait_for_spec", - "sync_safekeepers_ms": f"{slru}_{i}_sync_safekeepers", - "sync_sk_check_ms": f"{slru}_{i}_sync_sk_check", - "basebackup_ms": f"{slru}_{i}_basebackup", - "start_postgres_ms": f"{slru}_{i}_start_postgres", - "config_ms": f"{slru}_{i}_config", - "total_startup_ms": f"{slru}_{i}_total_startup", - } - for key, name in durations.items(): - value = metrics[key] - zenbenchmark.record(name, value, "ms", report=MetricReport.LOWER_IS_BETTER) - - basebackup_bytes = metrics["basebackup_bytes"] - zenbenchmark.record( - f"{slru}_{i}_basebackup_bytes", - basebackup_bytes, - "bytes", - report=MetricReport.LOWER_IS_BETTER, - ) - - # Stop so we can restart - endpoint.stop() - - # Imitate optimizations that console would do for the second start - endpoint.respec(skip_pg_catalog_updates=True) - slru = "eager" + # Imitate optimizations that console would do for the second start + endpoint.respec(skip_pg_catalog_updates=True)