diff --git a/test_runner/performance/test_compute_startup.py b/test_runner/performance/test_compute_startup.py
new file mode 100644
index 0000000000..d002d2e221
--- /dev/null
+++ b/test_runner/performance/test_compute_startup.py
@@ -0,0 +1,240 @@
+from __future__ import annotations
+
+import pytest
+import requests
+from fixtures.benchmark_fixture import MetricReport, NeonBenchmarker
+from fixtures.neon_fixtures import NeonEnvBuilder, PgBin
+
+
+# Just start and measure duration.
+#
+# This test runs pretty quickly and can be informative when used in combination
+# with emulated network delay. Some useful delay commands:
+#
+# 1. Add 2msec delay to all localhost traffic
+# `sudo tc qdisc add dev lo root handle 1:0 netem delay 2msec`
+#
+# 2. Test that it works (you should see 4ms ping)
+# `ping localhost`
+#
+# 3. Revert back to normal
+# `sudo tc qdisc del dev lo root netem`
+#
+# NOTE this test might not represent the real startup time because the basebackup
+#      for a large database might be larger if there's a lof of transaction metadata,
+#      or safekeepers might need more syncing, or there might be more operations to
+#      apply during config step, like more users, databases, or extensions. By default
+#      we load extensions 'neon,pg_stat_statements,timescaledb,pg_cron', but in this
+#      test we only load neon.
+def test_compute_startup_simple(
+    neon_env_builder: NeonEnvBuilder,
+    zenbenchmark: NeonBenchmarker,
+):
+    neon_env_builder.num_safekeepers = 3
+    env = neon_env_builder.init_start()
+
+    env.create_branch("test_startup")
+
+    endpoint = None
+
+    # We do two iterations so we can see if the second startup is faster. It should
+    # be because the compute node should already be configured with roles, databases,
+    # extensions, etc from the first run.
+    for i in range(2):
+        # Start
+        with zenbenchmark.record_duration(f"{i}_start_and_select"):
+            if endpoint:
+                endpoint.start()
+            else:
+                endpoint = env.endpoints.create(
+                    "test_startup",
+                    # Shared buffers need to be allocated during startup, so they
+                    # impact startup time. This is the default value we use for
+                    # 1CPU pods (maybe different for VMs).
+                    #
+                    # TODO extensions also contribute to shared memory allocation,
+                    #      and this test doesn't include all default extensions we
+                    #      load.
+                    config_lines=["shared_buffers=262144"],
+                )
+                # Do not skip pg_catalog updates at first start, i.e.
+                # imitate 'the first start after project creation'.
+                endpoint.respec(skip_pg_catalog_updates=False)
+                endpoint.start()
+            endpoint.safe_psql("select 1;")
+
+        # Get metrics
+        metrics = requests.get(
+            f"http://localhost:{endpoint.external_http_port}/metrics.json"
+        ).json()
+        durations = {
+            "wait_for_spec_ms": f"{i}_wait_for_spec",
+            "sync_safekeepers_ms": f"{i}_sync_safekeepers",
+            "sync_sk_check_ms": f"{i}_sync_sk_check",
+            "basebackup_ms": f"{i}_basebackup",
+            "start_postgres_ms": f"{i}_start_postgres",
+            "config_ms": f"{i}_config",
+            "total_startup_ms": f"{i}_total_startup",
+        }
+        for key, name in durations.items():
+            value = metrics[key]
+            zenbenchmark.record(name, value, "ms", report=MetricReport.LOWER_IS_BETTER)
+
+        # Check basebackup size makes sense
+        basebackup_bytes = metrics["basebackup_bytes"]
+        if i > 0:
+            assert basebackup_bytes < 100 * 1024
+
+        # Stop so we can restart
+        endpoint.stop()
+
+        # Imitate optimizations that console would do for the second start
+        endpoint.respec(skip_pg_catalog_updates=True)
+
+
+# Start and measure duration with huge SLRU segments.
+# This test is similar to test_compute_startup_simple, but it creates huge number of transactions
+# and records containing this XIDs. Autovacuum is disable for the table to prevent CLOG truncation.
+# TODO: this is very suspicious test, I doubt that it does what it's supposed to do,
+# e.g. these two starts do not make much sense. Looks like it's just copy-paste.
+# To be fixed within https://github.com/neondatabase/cloud/issues/8673
+@pytest.mark.timeout(1800)
+@pytest.mark.parametrize("slru", ["lazy", "eager"])
+def test_compute_ondemand_slru_startup(
+    slru: str, neon_env_builder: NeonEnvBuilder, zenbenchmark: NeonBenchmarker
+):
+    neon_env_builder.num_safekeepers = 3
+    env = neon_env_builder.init_start()
+
+    lazy_slru_download = "true" if slru == "lazy" else "false"
+    tenant, _ = env.create_tenant(
+        conf={
+            "lazy_slru_download": lazy_slru_download,
+        }
+    )
+
+    endpoint = env.endpoints.create_start("main", tenant_id=tenant)
+    with endpoint.cursor() as cur:
+        cur.execute("CREATE TABLE t (pk integer PRIMARY KEY, x integer)")
+        cur.execute("ALTER TABLE t SET (autovacuum_enabled = false)")
+        cur.execute("INSERT INTO t VALUES (1, 0)")
+        cur.execute(
+            """
+            CREATE PROCEDURE updating() as
+            $$
+                DECLARE
+                i integer;
+                BEGIN
+                FOR i IN 1..1000000 LOOP
+                    UPDATE t SET x = x + 1 WHERE pk=1;
+                    COMMIT;
+                END LOOP;
+                END
+            $$ LANGUAGE plpgsql
+            """
+        )
+        cur.execute("SET statement_timeout=0")
+        cur.execute("call updating()")
+
+    endpoint.stop()
+
+    # We do two iterations so we can see if the second startup is faster. It should
+    # be because the compute node should already be configured with roles, databases,
+    # extensions, etc from the first run.
+    for i in range(2):
+        # Start
+        with zenbenchmark.record_duration(f"{slru}_{i}_start"):
+            endpoint.start()
+
+        with zenbenchmark.record_duration(f"{slru}_{i}_select"):
+            sum = endpoint.safe_psql("select sum(x) from t")[0][0]
+            assert sum == 1000000
+
+        # Get metrics
+        metrics = requests.get(
+            f"http://localhost:{endpoint.external_http_port}/metrics.json"
+        ).json()
+        durations = {
+            "wait_for_spec_ms": f"{slru}_{i}_wait_for_spec",
+            "sync_safekeepers_ms": f"{slru}_{i}_sync_safekeepers",
+            "sync_sk_check_ms": f"{slru}_{i}_sync_sk_check",
+            "basebackup_ms": f"{slru}_{i}_basebackup",
+            "start_postgres_ms": f"{slru}_{i}_start_postgres",
+            "config_ms": f"{slru}_{i}_config",
+            "total_startup_ms": f"{slru}_{i}_total_startup",
+        }
+        for key, name in durations.items():
+            value = metrics[key]
+            zenbenchmark.record(name, value, "ms", report=MetricReport.LOWER_IS_BETTER)
+
+        basebackup_bytes = metrics["basebackup_bytes"]
+        zenbenchmark.record(
+            f"{slru}_{i}_basebackup_bytes",
+            basebackup_bytes,
+            "bytes",
+            report=MetricReport.LOWER_IS_BETTER,
+        )
+
+        # Stop so we can restart
+        endpoint.stop()
+
+        # Imitate optimizations that console would do for the second start
+        endpoint.respec(skip_pg_catalog_updates=True)
+
+
+@pytest.mark.timeout(240)
+def test_compute_startup_latency(
+    neon_env_builder: NeonEnvBuilder,
+    pg_bin: PgBin,
+    zenbenchmark: NeonBenchmarker,
+):
+    """
+    Do NUM_STARTS 'optimized' starts, i.e. with pg_catalog updates skipped,
+    and measure the duration of each step. Report p50, p90, p99 latencies.
+    """
+    neon_env_builder.num_safekeepers = 3
+    env = neon_env_builder.init_start()
+
+    endpoint = env.endpoints.create_start("main")
+    pg_bin.run_capture(["pgbench", "-i", "-I", "dtGvp", "-s4", endpoint.connstr()])
+    endpoint.stop()
+
+    NUM_STARTS = 100
+
+    durations: dict[str, list[int]] = {
+        "sync_sk_check_ms": [],
+        "sync_safekeepers_ms": [],
+        "basebackup_ms": [],
+        "start_postgres_ms": [],
+        "total_startup_ms": [],
+    }
+
+    for _i in range(NUM_STARTS):
+        endpoint.start()
+        client = endpoint.http_client()
+        metrics = client.metrics_json()
+        for key in durations.keys():
+            value = metrics[key]
+            durations[key].append(value)
+        endpoint.stop()
+
+    for key in durations.keys():
+        durations[key] = sorted(durations[key])
+        zenbenchmark.record(
+            f"{key}_p50",
+            durations[key][len(durations[key]) // 2],
+            "ms",
+            report=MetricReport.LOWER_IS_BETTER,
+        )
+        zenbenchmark.record(
+            f"{key}_p90",
+            durations[key][len(durations[key]) * 9 // 10],
+            "ms",
+            report=MetricReport.LOWER_IS_BETTER,
+        )
+        zenbenchmark.record(
+            f"{key}_p99",
+            durations[key][len(durations[key]) * 99 // 100],
+            "ms",
+            report=MetricReport.LOWER_IS_BETTER,
+        )
diff --git a/test_runner/performance/test_lazy_startup.py b/test_runner/performance/test_lazy_startup.py
deleted file mode 100644
index 3bf3ef890f..0000000000
--- a/test_runner/performance/test_lazy_startup.py
+++ /dev/null
@@ -1,110 +0,0 @@
-from __future__ import annotations
-
-import pytest
-import requests
-from fixtures.benchmark_fixture import MetricReport, NeonBenchmarker
-from fixtures.neon_fixtures import NeonEnvBuilder
-
-
-# Start and measure duration with huge SLRU segments.
-# This test is similar to test_startup_simple, but it creates huge number of transactions
-# and records containing this XIDs. Autovacuum is disable for the table to prevent CLOG truncation.
-#
-# This test runs pretty quickly and can be informative when used in combination
-# with emulated network delay. Some useful delay commands:
-#
-# 1. Add 2msec delay to all localhost traffic
-# `sudo tc qdisc add dev lo root handle 1:0 netem delay 2msec`
-#
-# 2. Test that it works (you should see 4ms ping)
-# `ping localhost`
-#
-# 3. Revert back to normal
-# `sudo tc qdisc del dev lo root netem`
-#
-# NOTE this test might not represent the real startup time because the basebackup
-#      for a large database might be larger if there's a lof of transaction metadata,
-#      or safekeepers might need more syncing, or there might be more operations to
-#      apply during config step, like more users, databases, or extensions. By default
-#      we load extensions 'neon,pg_stat_statements,timescaledb,pg_cron', but in this
-#      test we only load neon.
-@pytest.mark.timeout(1800)
-@pytest.mark.parametrize("slru", ["lazy", "eager"])
-def test_lazy_startup(slru: str, neon_env_builder: NeonEnvBuilder, zenbenchmark: NeonBenchmarker):
-    neon_env_builder.num_safekeepers = 3
-    env = neon_env_builder.init_start()
-
-    lazy_slru_download = "true" if slru == "lazy" else "false"
-    tenant, _ = env.create_tenant(
-        conf={
-            "lazy_slru_download": lazy_slru_download,
-        }
-    )
-
-    endpoint = env.endpoints.create_start("main", tenant_id=tenant)
-    with endpoint.cursor() as cur:
-        cur.execute("CREATE TABLE t (pk integer PRIMARY KEY, x integer)")
-        cur.execute("ALTER TABLE t SET (autovacuum_enabled = false)")
-        cur.execute("INSERT INTO t VALUES (1, 0)")
-        cur.execute(
-            """
-            CREATE PROCEDURE updating() as
-            $$
-                DECLARE
-                i integer;
-                BEGIN
-                FOR i IN 1..1000000 LOOP
-                    UPDATE t SET x = x + 1 WHERE pk=1;
-                    COMMIT;
-                END LOOP;
-                END
-            $$ LANGUAGE plpgsql
-            """
-        )
-        cur.execute("SET statement_timeout=0")
-        cur.execute("call updating()")
-
-    endpoint.stop()
-
-    # We do two iterations so we can see if the second startup is faster. It should
-    # be because the compute node should already be configured with roles, databases,
-    # extensions, etc from the first run.
-    for i in range(2):
-        # Start
-        with zenbenchmark.record_duration(f"{slru}_{i}_start"):
-            endpoint.start()
-
-        with zenbenchmark.record_duration(f"{slru}_{i}_select"):
-            sum = endpoint.safe_psql("select sum(x) from t")[0][0]
-            assert sum == 1000000
-
-        # Get metrics
-        metrics = requests.get(
-            f"http://localhost:{endpoint.external_http_port}/metrics.json"
-        ).json()
-        durations = {
-            "wait_for_spec_ms": f"{slru}_{i}_wait_for_spec",
-            "sync_safekeepers_ms": f"{slru}_{i}_sync_safekeepers",
-            "sync_sk_check_ms": f"{slru}_{i}_sync_sk_check",
-            "basebackup_ms": f"{slru}_{i}_basebackup",
-            "start_postgres_ms": f"{slru}_{i}_start_postgres",
-            "config_ms": f"{slru}_{i}_config",
-            "total_startup_ms": f"{slru}_{i}_total_startup",
-        }
-        for key, name in durations.items():
-            value = metrics[key]
-            zenbenchmark.record(name, value, "ms", report=MetricReport.LOWER_IS_BETTER)
-
-        basebackup_bytes = metrics["basebackup_bytes"]
-        zenbenchmark.record(
-            f"{slru}_{i}_basebackup_bytes",
-            basebackup_bytes,
-            "bytes",
-            report=MetricReport.LOWER_IS_BETTER,
-        )
-
-        # Stop so we can restart
-        endpoint.stop()
-
-        # Imitate optimizations that console would do for the second start
-        endpoint.respec(skip_pg_catalog_updates=True)
diff --git a/test_runner/performance/test_startup.py b/test_runner/performance/test_startup.py
deleted file mode 100644
index 60d8b5be30..0000000000
--- a/test_runner/performance/test_startup.py
+++ /dev/null
@@ -1,84 +0,0 @@
-from __future__ import annotations
-
-import requests
-from fixtures.benchmark_fixture import MetricReport, NeonBenchmarker
-from fixtures.neon_fixtures import NeonEnvBuilder
-
-
-# Just start and measure duration.
-#
-# This test runs pretty quickly and can be informative when used in combination
-# with emulated network delay. Some useful delay commands:
-#
-# 1. Add 2msec delay to all localhost traffic
-# `sudo tc qdisc add dev lo root handle 1:0 netem delay 2msec`
-#
-# 2. Test that it works (you should see 4ms ping)
-# `ping localhost`
-#
-# 3. Revert back to normal
-# `sudo tc qdisc del dev lo root netem`
-#
-# NOTE this test might not represent the real startup time because the basebackup
-#      for a large database might be larger if there's a lof of transaction metadata,
-#      or safekeepers might need more syncing, or there might be more operations to
-#      apply during config step, like more users, databases, or extensions. By default
-#      we load extensions 'neon,pg_stat_statements,timescaledb,pg_cron', but in this
-#      test we only load neon.
-def test_startup_simple(neon_env_builder: NeonEnvBuilder, zenbenchmark: NeonBenchmarker):
-    neon_env_builder.num_safekeepers = 3
-    env = neon_env_builder.init_start()
-
-    env.create_branch("test_startup")
-
-    endpoint = None
-
-    # We do two iterations so we can see if the second startup is faster. It should
-    # be because the compute node should already be configured with roles, databases,
-    # extensions, etc from the first run.
-    for i in range(2):
-        # Start
-        with zenbenchmark.record_duration(f"{i}_start_and_select"):
-            if endpoint:
-                endpoint.start()
-            else:
-                endpoint = env.endpoints.create_start(
-                    "test_startup",
-                    # Shared buffers need to be allocated during startup, so they
-                    # impact startup time. This is the default value we use for
-                    # 1CPU pods (maybe different for VMs).
-                    #
-                    # TODO extensions also contribute to shared memory allocation,
-                    #      and this test doesn't include all default extensions we
-                    #      load.
-                    config_lines=["shared_buffers=262144"],
-                )
-            endpoint.safe_psql("select 1;")
-
-        # Get metrics
-        metrics = requests.get(
-            f"http://localhost:{endpoint.external_http_port}/metrics.json"
-        ).json()
-        durations = {
-            "wait_for_spec_ms": f"{i}_wait_for_spec",
-            "sync_safekeepers_ms": f"{i}_sync_safekeepers",
-            "sync_sk_check_ms": f"{i}_sync_sk_check",
-            "basebackup_ms": f"{i}_basebackup",
-            "start_postgres_ms": f"{i}_start_postgres",
-            "config_ms": f"{i}_config",
-            "total_startup_ms": f"{i}_total_startup",
-        }
-        for key, name in durations.items():
-            value = metrics[key]
-            zenbenchmark.record(name, value, "ms", report=MetricReport.LOWER_IS_BETTER)
-
-        # Check basebackup size makes sense
-        basebackup_bytes = metrics["basebackup_bytes"]
-        if i > 0:
-            assert basebackup_bytes < 100 * 1024
-
-        # Stop so we can restart
-        endpoint.stop()
-
-        # Imitate optimizations that console would do for the second start
-        endpoint.respec(skip_pg_catalog_updates=True)