mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-08 05:52:55 +00:00
## Problem - `test_basebackup_cache` fails in https://github.com/neondatabase/neon/pull/11712 because after the timelines on safekeepers are managed by storage controller, they do contain proper start_lsn and the compute_ctl tool sends the first basebackup request with this LSN. - `Failed to prepare basebackup` log messages during timeline initialization, because the timeline is not yet in the global timeline map. - Relates to https://github.com/neondatabase/cloud/issues/29353 ## Summary of changes - Account for `timeline_onto_safekeepers` storcon's option in the test. - Do not trigger basebackup prepare during the timeline initialization.
93 lines
3.2 KiB
Python
93 lines
3.2 KiB
Python
from __future__ import annotations
|
|
|
|
from typing import TYPE_CHECKING
|
|
|
|
from fixtures.utils import wait_until
|
|
|
|
if TYPE_CHECKING:
|
|
from fixtures.neon_fixtures import NeonEnvBuilder
|
|
|
|
|
|
def test_basebackup_cache(neon_env_builder: NeonEnvBuilder):
|
|
"""
|
|
Simple test for basebackup cache.
|
|
1. Check that we always hit the cache after compute restart.
|
|
2. Check that we eventually delete old basebackup files, but not the latest one.
|
|
3. Check that we delete basebackup file for timeline with active compute.
|
|
"""
|
|
|
|
neon_env_builder.pageserver_config_override = """
|
|
tenant_config = { basebackup_cache_enabled = true }
|
|
basebackup_cache_config = { cleanup_period = '1s' }
|
|
"""
|
|
|
|
env = neon_env_builder.init_start()
|
|
ep = env.endpoints.create("main")
|
|
ps = env.pageserver
|
|
ps_http = ps.http_client()
|
|
|
|
storcon_managed_timelines = (env.storage_controller_config or {}).get(
|
|
"timelines_onto_safekeepers", False
|
|
)
|
|
|
|
# 1. Check that we always hit the cache after compute restart.
|
|
for i in range(3):
|
|
ep.start()
|
|
ep.stop()
|
|
|
|
def check_metrics(i=i):
|
|
metrics = ps_http.get_metrics()
|
|
if storcon_managed_timelines:
|
|
# We do not cache the initial basebackup yet,
|
|
# so the first compute startup should be a miss.
|
|
assert (
|
|
metrics.query_one(
|
|
"pageserver_basebackup_cache_read_total", {"result": "miss"}
|
|
).value
|
|
== 1
|
|
)
|
|
else:
|
|
# If the timeline is not initialized on safekeeprs,
|
|
# the compute_ctl sends `get_basebackup` with lsn=None for the first startup.
|
|
# We do not use cache for such requests, so it's niether a hit nor a miss.
|
|
assert (
|
|
metrics.query_one(
|
|
"pageserver_basebackup_cache_read_total", {"result": "miss"}
|
|
).value
|
|
== 0
|
|
)
|
|
|
|
# All but the first requests are hits.
|
|
assert (
|
|
metrics.query_one("pageserver_basebackup_cache_read_total", {"result": "hit"}).value
|
|
== i
|
|
)
|
|
# Every compute shut down should trigger a prepare reuest.
|
|
assert (
|
|
metrics.query_one(
|
|
"pageserver_basebackup_cache_prepare_total", {"result": "ok"}
|
|
).value
|
|
== i + 1
|
|
)
|
|
|
|
wait_until(check_metrics)
|
|
|
|
# 2. Check that we eventually delete old basebackup files, but not the latest one.
|
|
def check_bb_file_count():
|
|
bb_files = list(ps.workdir.joinpath("basebackup_cache").iterdir())
|
|
# tmp dir + 1 basebackup file.
|
|
assert len(bb_files) == 2
|
|
|
|
wait_until(check_bb_file_count)
|
|
|
|
# 3. Check that we delete basebackup file for timeline with active compute.
|
|
ep.start()
|
|
ep.safe_psql("create table t1 as select generate_series(1, 10) as n")
|
|
|
|
def check_bb_dir_empty():
|
|
bb_files = list(ps.workdir.joinpath("basebackup_cache").iterdir())
|
|
# only tmp dir.
|
|
assert len(bb_files) == 1
|
|
|
|
wait_until(check_bb_dir_empty)
|