neon/test_runner/performance/test_compute_startup.py

from __future__ import annotations

from typing import TYPE_CHECKING

import pytest
from fixtures.benchmark_fixture import MetricReport, NeonBenchmarker

if TYPE_CHECKING:
    from fixtures.neon_fixtures import NeonEnvBuilder, PgBin


# Just start and measure duration.
#
# This test runs pretty quickly and can be informative when used in combination
# with emulated network delay. Some useful delay commands:
#
# 1. Add 2msec delay to all localhost traffic
# `sudo tc qdisc add dev lo root handle 1:0 netem delay 2msec`
#
# 2. Test that it works (you should see 4ms ping)
# `ping localhost`
#
# 3. Revert back to normal
# `sudo tc qdisc del dev lo root netem`
#
# NOTE this test might not represent the real startup time because the basebackup
#      for a large database might be larger if there's a lof of transaction metadata,
#      or safekeepers might need more syncing, or there might be more operations to
#      apply during config step, like more users, databases, or extensions. By default
#      we load extensions 'neon,pg_stat_statements,timescaledb,pg_cron', but in this
#      test we only load neon.
def test_compute_startup_simple(
    neon_env_builder: NeonEnvBuilder,
    zenbenchmark: NeonBenchmarker,
):
    neon_env_builder.num_safekeepers = 3
    env = neon_env_builder.init_start()

    env.create_branch("test_startup")

    endpoint = None

    # We do two iterations so we can see if the second startup is faster. It should
    # be because the compute node should already be configured with roles, databases,
    # extensions, etc from the first run.
    for i in range(2):
        # Start
        with zenbenchmark.record_duration(f"{i}_start_and_select"):
            if endpoint:
                endpoint.start()
            else:
                endpoint = env.endpoints.create(
                    "test_startup",
                    # Shared buffers need to be allocated during startup, so they
                    # impact startup time. This is the default value we use for
                    # 1CPU pods (maybe different for VMs).
                    #
                    # TODO extensions also contribute to shared memory allocation,
                    #      and this test doesn't include all default extensions we
                    #      load.
                    config_lines=["shared_buffers=262144"],
                )
                # Do not skip pg_catalog updates at first start, i.e.
                # imitate 'the first start after project creation'.
                endpoint.respec(skip_pg_catalog_updates=False)
                endpoint.start()
            endpoint.safe_psql("select 1;")

        # Get metrics
        metrics = endpoint.http_client().metrics_json()
        durations = {
            "wait_for_spec_ms": f"{i}_wait_for_spec",
            "sync_safekeepers_ms": f"{i}_sync_safekeepers",
            "sync_sk_check_ms": f"{i}_sync_sk_check",
            "basebackup_ms": f"{i}_basebackup",
            "start_postgres_ms": f"{i}_start_postgres",
            "config_ms": f"{i}_config",
            "total_startup_ms": f"{i}_total_startup",
        }
        for key, name in durations.items():
            value = metrics[key]
            zenbenchmark.record(name, value, "ms", report=MetricReport.LOWER_IS_BETTER)

        # Check basebackup size makes sense
        basebackup_bytes = metrics["basebackup_bytes"]
        if i > 0:
            assert basebackup_bytes < 100 * 1024

        # Stop so we can restart
        endpoint.stop()

        # Imitate optimizations that console would do for the second start
        endpoint.respec(skip_pg_catalog_updates=True)


# Start and measure duration with huge SLRU segments.
# This test is similar to test_compute_startup_simple, but it creates huge number of transactions
# and records containing this XIDs. Autovacuum is disable for the table to prevent CLOG truncation.
# TODO: this is very suspicious test, I doubt that it does what it's supposed to do,
# e.g. these two starts do not make much sense. Looks like it's just copy-paste.
# To be fixed within https://github.com/neondatabase/cloud/issues/8673
@pytest.mark.timeout(1800)
@pytest.mark.parametrize("slru", ["lazy", "eager"])
def test_compute_ondemand_slru_startup(
    slru: str, neon_env_builder: NeonEnvBuilder, zenbenchmark: NeonBenchmarker
):
    neon_env_builder.num_safekeepers = 3
    env = neon_env_builder.init_start()

    lazy_slru_download = "true" if slru == "lazy" else "false"
    tenant, _ = env.create_tenant(
        conf={
            "lazy_slru_download": lazy_slru_download,
        }
    )

    endpoint = env.endpoints.create_start("main", tenant_id=tenant)
    with endpoint.cursor() as cur:
        cur.execute("CREATE TABLE t (pk integer PRIMARY KEY, x integer)")
        cur.execute("ALTER TABLE t SET (autovacuum_enabled = false)")
        cur.execute("INSERT INTO t VALUES (1, 0)")
        cur.execute(
            """
            CREATE PROCEDURE updating() as
            $$
                DECLARE
                i integer;
                BEGIN
                FOR i IN 1..1000000 LOOP
                    UPDATE t SET x = x + 1 WHERE pk=1;
                    COMMIT;
                END LOOP;
                END
            $$ LANGUAGE plpgsql
            """
        )
        cur.execute("SET statement_timeout=0")
        cur.execute("call updating()")

    endpoint.stop()

    # We do two iterations so we can see if the second startup is faster. It should
    # be because the compute node should already be configured with roles, databases,
    # extensions, etc from the first run.
    for i in range(2):
        # Start
        with zenbenchmark.record_duration(f"{slru}_{i}_start"):
            endpoint.start()

        with zenbenchmark.record_duration(f"{slru}_{i}_select"):
            sum = endpoint.safe_psql("select sum(x) from t")[0][0]
            assert sum == 1000000

        # Get metrics
        metrics = endpoint.http_client().metrics_json()
        durations = {
            "wait_for_spec_ms": f"{slru}_{i}_wait_for_spec",
            "sync_safekeepers_ms": f"{slru}_{i}_sync_safekeepers",
            "sync_sk_check_ms": f"{slru}_{i}_sync_sk_check",
            "basebackup_ms": f"{slru}_{i}_basebackup",
            "start_postgres_ms": f"{slru}_{i}_start_postgres",
            "config_ms": f"{slru}_{i}_config",
            "total_startup_ms": f"{slru}_{i}_total_startup",
        }
        for key, name in durations.items():
            value = metrics[key]
            zenbenchmark.record(name, value, "ms", report=MetricReport.LOWER_IS_BETTER)

        basebackup_bytes = metrics["basebackup_bytes"]
        zenbenchmark.record(
            f"{slru}_{i}_basebackup_bytes",
            basebackup_bytes,
            "bytes",
            report=MetricReport.LOWER_IS_BETTER,
        )

        # Stop so we can restart
        endpoint.stop()

        # Imitate optimizations that console would do for the second start
        endpoint.respec(skip_pg_catalog_updates=True)


@pytest.mark.timeout(240)
def test_compute_startup_latency(
    neon_env_builder: NeonEnvBuilder,
    pg_bin: PgBin,
    zenbenchmark: NeonBenchmarker,
):
    """
    Do NUM_STARTS 'optimized' starts, i.e. with pg_catalog updates skipped,
    and measure the duration of each step. Report p50, p90, p99 latencies.
    """
    neon_env_builder.num_safekeepers = 3
    env = neon_env_builder.init_start()

    endpoint = env.endpoints.create_start("main")
    pg_bin.run_capture(["pgbench", "-i", "-I", "dtGvp", "-s4", endpoint.connstr()])
    endpoint.stop()

    NUM_STARTS = 100

    durations: dict[str, list[int]] = {
        "sync_sk_check_ms": [],
        "sync_safekeepers_ms": [],
        "basebackup_ms": [],
        "start_postgres_ms": [],
        "total_startup_ms": [],
    }

    for _i in range(NUM_STARTS):
        endpoint.start()
        client = endpoint.http_client()
        metrics = client.metrics_json()
        for key in durations.keys():
            value = metrics[key]
            durations[key].append(value)
        endpoint.stop()

    for key in durations.keys():
        durations[key] = sorted(durations[key])
        zenbenchmark.record(
            f"{key}_p50",
            durations[key][len(durations[key]) // 2],
            "ms",
            report=MetricReport.LOWER_IS_BETTER,
        )
        zenbenchmark.record(
            f"{key}_p90",
            durations[key][len(durations[key]) * 9 // 10],
            "ms",
            report=MetricReport.LOWER_IS_BETTER,
        )
        zenbenchmark.record(
            f"{key}_p99",
            durations[key][len(durations[key]) * 99 // 100],
            "ms",
            report=MetricReport.LOWER_IS_BETTER,
        )