mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-08 14:02:55 +00:00
## Problem `TYPE_CHECKING` is used inconsistently across Python tests. ## Summary of changes - Update `ruff`: 0.7.0 -> 0.11.2 - Enable TC (flake8-type-checking): https://docs.astral.sh/ruff/rules/#flake8-type-checking-tc - (auto)fix all new issues
245 lines
8.7 KiB
Python
245 lines
8.7 KiB
Python
from __future__ import annotations
|
|
|
|
from typing import TYPE_CHECKING
|
|
|
|
import pytest
|
|
import requests
|
|
from fixtures.benchmark_fixture import MetricReport, NeonBenchmarker
|
|
|
|
if TYPE_CHECKING:
|
|
from fixtures.neon_fixtures import NeonEnvBuilder, PgBin
|
|
|
|
|
|
# Just start and measure duration.
|
|
#
|
|
# This test runs pretty quickly and can be informative when used in combination
|
|
# with emulated network delay. Some useful delay commands:
|
|
#
|
|
# 1. Add 2msec delay to all localhost traffic
|
|
# `sudo tc qdisc add dev lo root handle 1:0 netem delay 2msec`
|
|
#
|
|
# 2. Test that it works (you should see 4ms ping)
|
|
# `ping localhost`
|
|
#
|
|
# 3. Revert back to normal
|
|
# `sudo tc qdisc del dev lo root netem`
|
|
#
|
|
# NOTE this test might not represent the real startup time because the basebackup
|
|
# for a large database might be larger if there's a lof of transaction metadata,
|
|
# or safekeepers might need more syncing, or there might be more operations to
|
|
# apply during config step, like more users, databases, or extensions. By default
|
|
# we load extensions 'neon,pg_stat_statements,timescaledb,pg_cron', but in this
|
|
# test we only load neon.
|
|
def test_compute_startup_simple(
|
|
neon_env_builder: NeonEnvBuilder,
|
|
zenbenchmark: NeonBenchmarker,
|
|
):
|
|
neon_env_builder.num_safekeepers = 3
|
|
env = neon_env_builder.init_start()
|
|
|
|
env.create_branch("test_startup")
|
|
|
|
endpoint = None
|
|
|
|
# We do two iterations so we can see if the second startup is faster. It should
|
|
# be because the compute node should already be configured with roles, databases,
|
|
# extensions, etc from the first run.
|
|
for i in range(2):
|
|
# Start
|
|
with zenbenchmark.record_duration(f"{i}_start_and_select"):
|
|
if endpoint:
|
|
endpoint.start()
|
|
else:
|
|
endpoint = env.endpoints.create(
|
|
"test_startup",
|
|
# Shared buffers need to be allocated during startup, so they
|
|
# impact startup time. This is the default value we use for
|
|
# 1CPU pods (maybe different for VMs).
|
|
#
|
|
# TODO extensions also contribute to shared memory allocation,
|
|
# and this test doesn't include all default extensions we
|
|
# load.
|
|
config_lines=["shared_buffers=262144"],
|
|
)
|
|
# Do not skip pg_catalog updates at first start, i.e.
|
|
# imitate 'the first start after project creation'.
|
|
endpoint.respec(skip_pg_catalog_updates=False)
|
|
endpoint.start()
|
|
endpoint.safe_psql("select 1;")
|
|
|
|
# Get metrics
|
|
metrics = requests.get(
|
|
f"http://localhost:{endpoint.external_http_port}/metrics.json"
|
|
).json()
|
|
durations = {
|
|
"wait_for_spec_ms": f"{i}_wait_for_spec",
|
|
"sync_safekeepers_ms": f"{i}_sync_safekeepers",
|
|
"sync_sk_check_ms": f"{i}_sync_sk_check",
|
|
"basebackup_ms": f"{i}_basebackup",
|
|
"start_postgres_ms": f"{i}_start_postgres",
|
|
"config_ms": f"{i}_config",
|
|
"total_startup_ms": f"{i}_total_startup",
|
|
}
|
|
for key, name in durations.items():
|
|
value = metrics[key]
|
|
zenbenchmark.record(name, value, "ms", report=MetricReport.LOWER_IS_BETTER)
|
|
|
|
# Check basebackup size makes sense
|
|
basebackup_bytes = metrics["basebackup_bytes"]
|
|
if i > 0:
|
|
assert basebackup_bytes < 100 * 1024
|
|
|
|
# Stop so we can restart
|
|
endpoint.stop()
|
|
|
|
# Imitate optimizations that console would do for the second start
|
|
endpoint.respec(skip_pg_catalog_updates=True)
|
|
|
|
|
|
# Start and measure duration with huge SLRU segments.
|
|
# This test is similar to test_compute_startup_simple, but it creates huge number of transactions
|
|
# and records containing this XIDs. Autovacuum is disable for the table to prevent CLOG truncation.
|
|
# TODO: this is very suspicious test, I doubt that it does what it's supposed to do,
|
|
# e.g. these two starts do not make much sense. Looks like it's just copy-paste.
|
|
# To be fixed within https://github.com/neondatabase/cloud/issues/8673
|
|
@pytest.mark.timeout(1800)
|
|
@pytest.mark.parametrize("slru", ["lazy", "eager"])
|
|
def test_compute_ondemand_slru_startup(
|
|
slru: str, neon_env_builder: NeonEnvBuilder, zenbenchmark: NeonBenchmarker
|
|
):
|
|
neon_env_builder.num_safekeepers = 3
|
|
env = neon_env_builder.init_start()
|
|
|
|
lazy_slru_download = "true" if slru == "lazy" else "false"
|
|
tenant, _ = env.create_tenant(
|
|
conf={
|
|
"lazy_slru_download": lazy_slru_download,
|
|
}
|
|
)
|
|
|
|
endpoint = env.endpoints.create_start("main", tenant_id=tenant)
|
|
with endpoint.cursor() as cur:
|
|
cur.execute("CREATE TABLE t (pk integer PRIMARY KEY, x integer)")
|
|
cur.execute("ALTER TABLE t SET (autovacuum_enabled = false)")
|
|
cur.execute("INSERT INTO t VALUES (1, 0)")
|
|
cur.execute(
|
|
"""
|
|
CREATE PROCEDURE updating() as
|
|
$$
|
|
DECLARE
|
|
i integer;
|
|
BEGIN
|
|
FOR i IN 1..1000000 LOOP
|
|
UPDATE t SET x = x + 1 WHERE pk=1;
|
|
COMMIT;
|
|
END LOOP;
|
|
END
|
|
$$ LANGUAGE plpgsql
|
|
"""
|
|
)
|
|
cur.execute("SET statement_timeout=0")
|
|
cur.execute("call updating()")
|
|
|
|
endpoint.stop()
|
|
|
|
# We do two iterations so we can see if the second startup is faster. It should
|
|
# be because the compute node should already be configured with roles, databases,
|
|
# extensions, etc from the first run.
|
|
for i in range(2):
|
|
# Start
|
|
with zenbenchmark.record_duration(f"{slru}_{i}_start"):
|
|
endpoint.start()
|
|
|
|
with zenbenchmark.record_duration(f"{slru}_{i}_select"):
|
|
sum = endpoint.safe_psql("select sum(x) from t")[0][0]
|
|
assert sum == 1000000
|
|
|
|
# Get metrics
|
|
metrics = requests.get(
|
|
f"http://localhost:{endpoint.external_http_port}/metrics.json"
|
|
).json()
|
|
durations = {
|
|
"wait_for_spec_ms": f"{slru}_{i}_wait_for_spec",
|
|
"sync_safekeepers_ms": f"{slru}_{i}_sync_safekeepers",
|
|
"sync_sk_check_ms": f"{slru}_{i}_sync_sk_check",
|
|
"basebackup_ms": f"{slru}_{i}_basebackup",
|
|
"start_postgres_ms": f"{slru}_{i}_start_postgres",
|
|
"config_ms": f"{slru}_{i}_config",
|
|
"total_startup_ms": f"{slru}_{i}_total_startup",
|
|
}
|
|
for key, name in durations.items():
|
|
value = metrics[key]
|
|
zenbenchmark.record(name, value, "ms", report=MetricReport.LOWER_IS_BETTER)
|
|
|
|
basebackup_bytes = metrics["basebackup_bytes"]
|
|
zenbenchmark.record(
|
|
f"{slru}_{i}_basebackup_bytes",
|
|
basebackup_bytes,
|
|
"bytes",
|
|
report=MetricReport.LOWER_IS_BETTER,
|
|
)
|
|
|
|
# Stop so we can restart
|
|
endpoint.stop()
|
|
|
|
# Imitate optimizations that console would do for the second start
|
|
endpoint.respec(skip_pg_catalog_updates=True)
|
|
|
|
|
|
@pytest.mark.timeout(240)
|
|
def test_compute_startup_latency(
|
|
neon_env_builder: NeonEnvBuilder,
|
|
pg_bin: PgBin,
|
|
zenbenchmark: NeonBenchmarker,
|
|
):
|
|
"""
|
|
Do NUM_STARTS 'optimized' starts, i.e. with pg_catalog updates skipped,
|
|
and measure the duration of each step. Report p50, p90, p99 latencies.
|
|
"""
|
|
neon_env_builder.num_safekeepers = 3
|
|
env = neon_env_builder.init_start()
|
|
|
|
endpoint = env.endpoints.create_start("main")
|
|
pg_bin.run_capture(["pgbench", "-i", "-I", "dtGvp", "-s4", endpoint.connstr()])
|
|
endpoint.stop()
|
|
|
|
NUM_STARTS = 100
|
|
|
|
durations: dict[str, list[int]] = {
|
|
"sync_sk_check_ms": [],
|
|
"sync_safekeepers_ms": [],
|
|
"basebackup_ms": [],
|
|
"start_postgres_ms": [],
|
|
"total_startup_ms": [],
|
|
}
|
|
|
|
for _i in range(NUM_STARTS):
|
|
endpoint.start()
|
|
client = endpoint.http_client()
|
|
metrics = client.metrics_json()
|
|
for key in durations.keys():
|
|
value = metrics[key]
|
|
durations[key].append(value)
|
|
endpoint.stop()
|
|
|
|
for key in durations.keys():
|
|
durations[key] = sorted(durations[key])
|
|
zenbenchmark.record(
|
|
f"{key}_p50",
|
|
durations[key][len(durations[key]) // 2],
|
|
"ms",
|
|
report=MetricReport.LOWER_IS_BETTER,
|
|
)
|
|
zenbenchmark.record(
|
|
f"{key}_p90",
|
|
durations[key][len(durations[key]) * 9 // 10],
|
|
"ms",
|
|
report=MetricReport.LOWER_IS_BETTER,
|
|
)
|
|
zenbenchmark.record(
|
|
f"{key}_p99",
|
|
durations[key][len(durations[key]) * 99 // 100],
|
|
"ms",
|
|
report=MetricReport.LOWER_IS_BETTER,
|
|
)
|