mirror of
https://github.com/neondatabase/neon.git
synced 2025-12-25 23:29:59 +00:00
## Problem See https://github.com/neondatabase/cloud/issues/8673 ## Summary of changes Download missed SLRU segments from page server ## Checklist before requesting a review - [ ] I have performed a self-review of my code. - [ ] If it is a core feature, I have added thorough tests. - [ ] Do we need to implement analytics? if so did you add the relevant metrics to the dashboard? - [ ] If this PR requires public announcement, mark it with /release-notes label and add several sentences in this section. ## Checklist before merging - [ ] Do not forget to reformat commit message to not include the above checklist --------- Co-authored-by: Konstantin Knizhnik <knizhnik@neon.tech> Co-authored-by: Heikki Linnakangas <heikki@neon.tech>
112 lines
4.3 KiB
Python
112 lines
4.3 KiB
Python
import pytest
|
|
import requests
|
|
from fixtures.benchmark_fixture import MetricReport, NeonBenchmarker
|
|
from fixtures.neon_fixtures import NeonEnvBuilder
|
|
|
|
|
|
# Start and measure duration with huge SLRU segments.
|
|
# This test is similar to test_startup_simple, but it creates huge number of transactions
|
|
# and records containing this XIDs. Autovacuum is disable for the table to prevent CLOG truncation.
|
|
#
|
|
# This test runs pretty quickly and can be informative when used in combination
|
|
# with emulated network delay. Some useful delay commands:
|
|
#
|
|
# 1. Add 2msec delay to all localhost traffic
|
|
# `sudo tc qdisc add dev lo root handle 1:0 netem delay 2msec`
|
|
#
|
|
# 2. Test that it works (you should see 4ms ping)
|
|
# `ping localhost`
|
|
#
|
|
# 3. Revert back to normal
|
|
# `sudo tc qdisc del dev lo root netem`
|
|
#
|
|
# NOTE this test might not represent the real startup time because the basebackup
|
|
# for a large database might be larger if there's a lof of transaction metadata,
|
|
# or safekeepers might need more syncing, or there might be more operations to
|
|
# apply during config step, like more users, databases, or extensions. By default
|
|
# we load extensions 'neon,pg_stat_statements,timescaledb,pg_cron', but in this
|
|
# test we only load neon.
|
|
@pytest.mark.timeout(1000)
|
|
def test_lazy_startup(neon_env_builder: NeonEnvBuilder, zenbenchmark: NeonBenchmarker):
|
|
neon_env_builder.num_safekeepers = 3
|
|
env = neon_env_builder.init_start()
|
|
|
|
lazy_tenant, _ = env.neon_cli.create_tenant(
|
|
conf={
|
|
"lazy_slru_download": "true",
|
|
}
|
|
)
|
|
eager_tenant, _ = env.neon_cli.create_tenant(
|
|
conf={
|
|
"lazy_slru_download": "false",
|
|
}
|
|
)
|
|
tenants = [lazy_tenant, eager_tenant]
|
|
slru = "lazy"
|
|
for tenant in tenants:
|
|
endpoint = env.endpoints.create_start("main", tenant_id=tenant)
|
|
endpoint.safe_psql("CREATE TABLE t (pk integer PRIMARY KEY, x integer)")
|
|
endpoint.safe_psql("ALTER TABLE t SET (autovacuum_enabled = false)")
|
|
endpoint.safe_psql("INSERT INTO t VALUES (1, 0)")
|
|
endpoint.safe_psql(
|
|
"""
|
|
CREATE PROCEDURE updating() as
|
|
$$
|
|
DECLARE
|
|
i integer;
|
|
BEGIN
|
|
FOR i IN 1..10000000 LOOP
|
|
UPDATE t SET x = x + 1 WHERE pk=1;
|
|
COMMIT;
|
|
END LOOP;
|
|
END
|
|
$$ LANGUAGE plpgsql
|
|
"""
|
|
)
|
|
endpoint.safe_psql("SET statement_timeout=0")
|
|
endpoint.safe_psql("call updating()")
|
|
|
|
endpoint.stop()
|
|
|
|
# We do two iterations so we can see if the second startup is faster. It should
|
|
# be because the compute node should already be configured with roles, databases,
|
|
# extensions, etc from the first run.
|
|
for i in range(2):
|
|
# Start
|
|
with zenbenchmark.record_duration(f"{slru}_{i}_start"):
|
|
endpoint.start()
|
|
|
|
with zenbenchmark.record_duration(f"{slru}_{i}_select"):
|
|
sum = endpoint.safe_psql("select sum(x) from t")[0][0]
|
|
assert sum == 10000000
|
|
|
|
# Get metrics
|
|
metrics = requests.get(f"http://localhost:{endpoint.http_port}/metrics.json").json()
|
|
durations = {
|
|
"wait_for_spec_ms": f"{slru}_{i}_wait_for_spec",
|
|
"sync_safekeepers_ms": f"{slru}_{i}_sync_safekeepers",
|
|
"sync_sk_check_ms": f"{slru}_{i}_sync_sk_check",
|
|
"basebackup_ms": f"{slru}_{i}_basebackup",
|
|
"start_postgres_ms": f"{slru}_{i}_start_postgres",
|
|
"config_ms": f"{slru}_{i}_config",
|
|
"total_startup_ms": f"{slru}_{i}_total_startup",
|
|
}
|
|
for key, name in durations.items():
|
|
value = metrics[key]
|
|
zenbenchmark.record(name, value, "ms", report=MetricReport.LOWER_IS_BETTER)
|
|
|
|
basebackup_bytes = metrics["basebackup_bytes"]
|
|
zenbenchmark.record(
|
|
f"{slru}_{i}_basebackup_bytes",
|
|
basebackup_bytes,
|
|
"bytes",
|
|
report=MetricReport.LOWER_IS_BETTER,
|
|
)
|
|
|
|
# Stop so we can restart
|
|
endpoint.stop()
|
|
|
|
# Imitate optimizations that console would do for the second start
|
|
endpoint.respec(skip_pg_catalog_updates=True)
|
|
slru = "eager"
|