mirror of
https://github.com/neondatabase/neon.git
synced 2026-05-29 11:00:38 +00:00
pageserver: basebackup cache (hackathon project) (#11989)
## Problem Basebackup cache is on the hot path of compute startup and is generated on every request (may be slow). - Issue: https://github.com/neondatabase/cloud/issues/29353 ## Summary of changes - Add `BasebackupCache` which stores basebackups on local disk. - Basebackup prepare requests are triggered by `XLOG_CHECKPOINT_SHUTDOWN` records in the log. - Limit the size of the cache by number of entries. - Add `basebackup_cache_enabled` feature flag to TenantConfig. - Write tests for the cache ## Not implemented yet - Limit the size of the cache by total size in bytes --------- Co-authored-by: Aleksandr Sarantsev <aleksandr@neon.tech>
This commit is contained in:
77
test_runner/regress/test_basebackup.py
Normal file
77
test_runner/regress/test_basebackup.py
Normal file
@@ -0,0 +1,77 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from fixtures.utils import wait_until
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from fixtures.neon_fixtures import NeonEnvBuilder
|
||||
|
||||
|
||||
def test_basebackup_cache(neon_env_builder: NeonEnvBuilder):
|
||||
"""
|
||||
Simple test for basebackup cache.
|
||||
1. Check that we always hit the cache after compute restart.
|
||||
2. Check that we eventually delete old basebackup files, but not the latest one.
|
||||
3. Check that we delete basebackup file for timeline with active compute.
|
||||
"""
|
||||
|
||||
neon_env_builder.pageserver_config_override = """
|
||||
tenant_config = { basebackup_cache_enabled = true }
|
||||
basebackup_cache_config = { cleanup_period = '1s' }
|
||||
"""
|
||||
|
||||
env = neon_env_builder.init_start()
|
||||
ep = env.endpoints.create("main")
|
||||
ps = env.pageserver
|
||||
ps_http = ps.http_client()
|
||||
|
||||
# 1. Check that we always hit the cache after compute restart.
|
||||
for i in range(3):
|
||||
ep.start()
|
||||
ep.stop()
|
||||
|
||||
def check_metrics(i=i):
|
||||
metrics = ps_http.get_metrics()
|
||||
# Never miss.
|
||||
# The first time compute_ctl sends `get_basebackup` with lsn=None, we do not cache such requests.
|
||||
# All other requests should be a hit
|
||||
assert (
|
||||
metrics.query_one(
|
||||
"pageserver_basebackup_cache_read_total", {"result": "miss"}
|
||||
).value
|
||||
== 0
|
||||
)
|
||||
# All but the first requests are hits.
|
||||
assert (
|
||||
metrics.query_one("pageserver_basebackup_cache_read_total", {"result": "hit"}).value
|
||||
== i
|
||||
)
|
||||
# Every compute shut down should trigger a prepare reuest.
|
||||
assert (
|
||||
metrics.query_one(
|
||||
"pageserver_basebackup_cache_prepare_total", {"result": "ok"}
|
||||
).value
|
||||
== i + 1
|
||||
)
|
||||
|
||||
wait_until(check_metrics)
|
||||
|
||||
# 2. Check that we eventually delete old basebackup files, but not the latest one.
|
||||
def check_bb_file_count():
|
||||
bb_files = list(ps.workdir.joinpath("basebackup_cache").iterdir())
|
||||
# tmp dir + 1 basebackup file.
|
||||
assert len(bb_files) == 2
|
||||
|
||||
wait_until(check_bb_file_count)
|
||||
|
||||
# 3. Check that we delete basebackup file for timeline with active compute.
|
||||
ep.start()
|
||||
ep.safe_psql("create table t1 as select generate_series(1, 10) as n")
|
||||
|
||||
def check_bb_dir_empty():
|
||||
bb_files = list(ps.workdir.joinpath("basebackup_cache").iterdir())
|
||||
# only tmp dir.
|
||||
assert len(bb_files) == 1
|
||||
|
||||
wait_until(check_bb_dir_empty)
|
||||
Reference in New Issue
Block a user