mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-07 21:42:56 +00:00
Download SLRU segments on demand (#6151)
## Problem See https://github.com/neondatabase/cloud/issues/8673 ## Summary of changes Download missed SLRU segments from page server ## Checklist before requesting a review - [ ] I have performed a self-review of my code. - [ ] If it is a core feature, I have added thorough tests. - [ ] Do we need to implement analytics? if so did you add the relevant metrics to the dashboard? - [ ] If this PR requires public announcement, mark it with /release-notes label and add several sentences in this section. ## Checklist before merging - [ ] Do not forget to reformat commit message to not include the above checklist --------- Co-authored-by: Konstantin Knizhnik <knizhnik@neon.tech> Co-authored-by: Heikki Linnakangas <heikki@neon.tech>
This commit is contained in:
committed by
GitHub
parent
2bfc831c60
commit
9a9d9beaee
@@ -3980,8 +3980,17 @@ def check_restored_datadir_content(test_output_dir: Path, env: NeonEnv, endpoint
|
||||
# list files we're going to compare
|
||||
assert endpoint.pgdata_dir
|
||||
pgdata_files = list_files_to_compare(Path(endpoint.pgdata_dir))
|
||||
|
||||
restored_files = list_files_to_compare(restored_dir_path)
|
||||
|
||||
if pgdata_files != restored_files:
|
||||
# filter pg_xact and multixact files which are downloaded on demand
|
||||
pgdata_files = [
|
||||
f
|
||||
for f in pgdata_files
|
||||
if not f.startswith("pg_xact") and not f.startswith("pg_multixact")
|
||||
]
|
||||
|
||||
# check that file sets are equal
|
||||
assert pgdata_files == restored_files
|
||||
|
||||
|
||||
111
test_runner/performance/test_lazy_startup.py
Normal file
111
test_runner/performance/test_lazy_startup.py
Normal file
@@ -0,0 +1,111 @@
|
||||
import pytest
|
||||
import requests
|
||||
from fixtures.benchmark_fixture import MetricReport, NeonBenchmarker
|
||||
from fixtures.neon_fixtures import NeonEnvBuilder
|
||||
|
||||
|
||||
# Start and measure duration with huge SLRU segments.
|
||||
# This test is similar to test_startup_simple, but it creates huge number of transactions
|
||||
# and records containing this XIDs. Autovacuum is disable for the table to prevent CLOG truncation.
|
||||
#
|
||||
# This test runs pretty quickly and can be informative when used in combination
|
||||
# with emulated network delay. Some useful delay commands:
|
||||
#
|
||||
# 1. Add 2msec delay to all localhost traffic
|
||||
# `sudo tc qdisc add dev lo root handle 1:0 netem delay 2msec`
|
||||
#
|
||||
# 2. Test that it works (you should see 4ms ping)
|
||||
# `ping localhost`
|
||||
#
|
||||
# 3. Revert back to normal
|
||||
# `sudo tc qdisc del dev lo root netem`
|
||||
#
|
||||
# NOTE this test might not represent the real startup time because the basebackup
|
||||
# for a large database might be larger if there's a lof of transaction metadata,
|
||||
# or safekeepers might need more syncing, or there might be more operations to
|
||||
# apply during config step, like more users, databases, or extensions. By default
|
||||
# we load extensions 'neon,pg_stat_statements,timescaledb,pg_cron', but in this
|
||||
# test we only load neon.
|
||||
@pytest.mark.timeout(1000)
|
||||
def test_lazy_startup(neon_env_builder: NeonEnvBuilder, zenbenchmark: NeonBenchmarker):
|
||||
neon_env_builder.num_safekeepers = 3
|
||||
env = neon_env_builder.init_start()
|
||||
|
||||
lazy_tenant, _ = env.neon_cli.create_tenant(
|
||||
conf={
|
||||
"lazy_slru_download": "true",
|
||||
}
|
||||
)
|
||||
eager_tenant, _ = env.neon_cli.create_tenant(
|
||||
conf={
|
||||
"lazy_slru_download": "false",
|
||||
}
|
||||
)
|
||||
tenants = [lazy_tenant, eager_tenant]
|
||||
slru = "lazy"
|
||||
for tenant in tenants:
|
||||
endpoint = env.endpoints.create_start("main", tenant_id=tenant)
|
||||
endpoint.safe_psql("CREATE TABLE t (pk integer PRIMARY KEY, x integer)")
|
||||
endpoint.safe_psql("ALTER TABLE t SET (autovacuum_enabled = false)")
|
||||
endpoint.safe_psql("INSERT INTO t VALUES (1, 0)")
|
||||
endpoint.safe_psql(
|
||||
"""
|
||||
CREATE PROCEDURE updating() as
|
||||
$$
|
||||
DECLARE
|
||||
i integer;
|
||||
BEGIN
|
||||
FOR i IN 1..10000000 LOOP
|
||||
UPDATE t SET x = x + 1 WHERE pk=1;
|
||||
COMMIT;
|
||||
END LOOP;
|
||||
END
|
||||
$$ LANGUAGE plpgsql
|
||||
"""
|
||||
)
|
||||
endpoint.safe_psql("SET statement_timeout=0")
|
||||
endpoint.safe_psql("call updating()")
|
||||
|
||||
endpoint.stop()
|
||||
|
||||
# We do two iterations so we can see if the second startup is faster. It should
|
||||
# be because the compute node should already be configured with roles, databases,
|
||||
# extensions, etc from the first run.
|
||||
for i in range(2):
|
||||
# Start
|
||||
with zenbenchmark.record_duration(f"{slru}_{i}_start"):
|
||||
endpoint.start()
|
||||
|
||||
with zenbenchmark.record_duration(f"{slru}_{i}_select"):
|
||||
sum = endpoint.safe_psql("select sum(x) from t")[0][0]
|
||||
assert sum == 10000000
|
||||
|
||||
# Get metrics
|
||||
metrics = requests.get(f"http://localhost:{endpoint.http_port}/metrics.json").json()
|
||||
durations = {
|
||||
"wait_for_spec_ms": f"{slru}_{i}_wait_for_spec",
|
||||
"sync_safekeepers_ms": f"{slru}_{i}_sync_safekeepers",
|
||||
"sync_sk_check_ms": f"{slru}_{i}_sync_sk_check",
|
||||
"basebackup_ms": f"{slru}_{i}_basebackup",
|
||||
"start_postgres_ms": f"{slru}_{i}_start_postgres",
|
||||
"config_ms": f"{slru}_{i}_config",
|
||||
"total_startup_ms": f"{slru}_{i}_total_startup",
|
||||
}
|
||||
for key, name in durations.items():
|
||||
value = metrics[key]
|
||||
zenbenchmark.record(name, value, "ms", report=MetricReport.LOWER_IS_BETTER)
|
||||
|
||||
basebackup_bytes = metrics["basebackup_bytes"]
|
||||
zenbenchmark.record(
|
||||
f"{slru}_{i}_basebackup_bytes",
|
||||
basebackup_bytes,
|
||||
"bytes",
|
||||
report=MetricReport.LOWER_IS_BETTER,
|
||||
)
|
||||
|
||||
# Stop so we can restart
|
||||
endpoint.stop()
|
||||
|
||||
# Imitate optimizations that console would do for the second start
|
||||
endpoint.respec(skip_pg_catalog_updates=True)
|
||||
slru = "eager"
|
||||
@@ -173,6 +173,7 @@ def test_fully_custom_config(positive_env: NeonEnv):
|
||||
"image_creation_threshold": 7,
|
||||
"pitr_interval": "1m",
|
||||
"lagging_wal_timeout": "23m",
|
||||
"lazy_slru_download": True,
|
||||
"max_lsn_wal_lag": 230000,
|
||||
"min_resident_size_override": 23,
|
||||
"trace_read_requests": True,
|
||||
|
||||
Reference in New Issue
Block a user