mirror of
https://github.com/neondatabase/neon.git
synced 2026-05-16 04:30:38 +00:00
341 lines
13 KiB
Python
341 lines
13 KiB
Python
import time
|
|
from dataclasses import dataclass
|
|
from pathlib import Path
|
|
from typing import Dict, Iterator, Tuple
|
|
|
|
import pytest
|
|
from fixtures.log_helper import log
|
|
from fixtures.neon_fixtures import (
|
|
LayerMapInfo,
|
|
NeonEnv,
|
|
NeonEnvBuilder,
|
|
PageserverHttpClient,
|
|
PgBin,
|
|
RemoteStorageKind,
|
|
wait_for_last_flush_lsn,
|
|
)
|
|
from fixtures.types import TenantId, TimelineId
|
|
|
|
|
|
@pytest.mark.parametrize("config_level_override", [None, 400])
|
|
def test_min_resident_size_override_handling(
|
|
neon_env_builder: NeonEnvBuilder, config_level_override: int
|
|
):
|
|
env = neon_env_builder.init_start()
|
|
ps_http = env.pageserver.http_client()
|
|
|
|
def assert_config(tenant_id, expect_override, expect_effective):
|
|
config = ps_http.tenant_config(tenant_id)
|
|
assert config.tenant_specific_overrides.get("min_resident_size_override") == expect_override
|
|
assert config.effective_config.get("min_resident_size_override") == expect_effective
|
|
|
|
def assert_overrides(tenant_id, default_tenant_conf_value):
|
|
ps_http.set_tenant_config(tenant_id, {"min_resident_size_override": 200})
|
|
assert_config(tenant_id, 200, 200)
|
|
|
|
ps_http.set_tenant_config(tenant_id, {"min_resident_size_override": 0})
|
|
assert_config(tenant_id, 0, 0)
|
|
|
|
ps_http.set_tenant_config(tenant_id, {})
|
|
assert_config(tenant_id, None, default_tenant_conf_value)
|
|
|
|
env.pageserver.stop()
|
|
if config_level_override is not None:
|
|
env.pageserver.start(
|
|
overrides=(
|
|
"--pageserver-config-override=tenant_config={ min_resident_size_override = "
|
|
+ str(config_level_override)
|
|
+ " }",
|
|
)
|
|
)
|
|
else:
|
|
env.pageserver.start()
|
|
|
|
tenant_id, _ = env.neon_cli.create_tenant()
|
|
assert_overrides(tenant_id, config_level_override)
|
|
|
|
# Also ensure that specifying the paramter to create_tenant works, in addition to http-level recconfig.
|
|
tenant_id, _ = env.neon_cli.create_tenant(conf={"min_resident_size_override": "100"})
|
|
assert_config(tenant_id, 100, 100)
|
|
ps_http.set_tenant_config(tenant_id, {})
|
|
assert_config(tenant_id, None, config_level_override)
|
|
|
|
|
|
@dataclass
|
|
class EvictionEnv:
|
|
timelines: list[Tuple[TenantId, TimelineId, LayerMapInfo]]
|
|
neon_env: NeonEnv
|
|
pg_bin: PgBin
|
|
pageserver_http: PageserverHttpClient
|
|
layer_size: int
|
|
|
|
def timelines_du(self) -> Tuple[int, int, int]:
|
|
return poor_mans_du(self.neon_env, [(tid, tlid) for tid, tlid, _ in self.timelines])
|
|
|
|
def du_by_timeline(self) -> Dict[Tuple[TenantId, TimelineId], int]:
|
|
return {
|
|
(tid, tlid): poor_mans_du(self.neon_env, [(tid, tlid)])[0]
|
|
for tid, tlid, _ in self.timelines
|
|
}
|
|
|
|
|
|
@pytest.fixture
|
|
def eviction_env(request, neon_env_builder: NeonEnvBuilder, pg_bin: PgBin) -> Iterator[EvictionEnv]:
|
|
"""
|
|
Creates two tenants, one somewhat larger than the other.
|
|
"""
|
|
|
|
log.info(f"setting up eviction_env for test {request.node.name}")
|
|
|
|
neon_env_builder.enable_remote_storage(RemoteStorageKind.LOCAL_FS, f"{request.node.name}")
|
|
|
|
env = neon_env_builder.init_start()
|
|
pageserver_http = env.pageserver.http_client()
|
|
|
|
# allow because we are invoking this manually; we always warn on executing disk based eviction
|
|
env.pageserver.allowed_errors.append(r".* running disk usage based eviction due to pressure.*")
|
|
env.pageserver.allowed_errors.append(
|
|
r".* Changing Active tenant to Broken state, reason: broken from test"
|
|
)
|
|
|
|
# break the difficult to use initial default tenant, later assert that it has not been evicted
|
|
broken_tenant_id, broken_timeline_id = (env.initial_tenant, env.initial_timeline)
|
|
assert broken_timeline_id is not None
|
|
pageserver_http.tenant_break(env.initial_tenant)
|
|
(broken_on_disk_before, _, _) = poor_mans_du(
|
|
env, timelines=[(broken_tenant_id, broken_timeline_id)]
|
|
)
|
|
env.pageserver.allowed_errors.append(
|
|
f".*extend_lru_candidates.*Tenant {broken_tenant_id} is not active. Current state: Broken"
|
|
)
|
|
|
|
timelines = []
|
|
|
|
# Choose small layer_size so that we can use low pgbench_scales and still get a large count of layers.
|
|
# Large count of layers and small layer size is good for testing because it makes evictions predictable.
|
|
# Predictable in the sense that many layer evictions will be required to reach the eviction target, because
|
|
# each eviction only makes small progress. That means little overshoot, and thereby stable asserts.
|
|
pgbench_scales = [4, 6]
|
|
layer_size = 5 * 1024**2
|
|
|
|
for scale in pgbench_scales:
|
|
tenant_id, timeline_id = env.neon_cli.create_tenant(
|
|
conf={
|
|
"gc_period": "0s",
|
|
"compaction_period": "0s",
|
|
"checkpoint_distance": f"{layer_size}",
|
|
"image_creation_threshold": "100",
|
|
"compaction_target_size": f"{layer_size}",
|
|
}
|
|
)
|
|
|
|
with env.postgres.create_start("main", tenant_id=tenant_id) as pg:
|
|
pg_bin.run(["pgbench", "-i", f"-s{scale}", pg.connstr()])
|
|
wait_for_last_flush_lsn(env, pg, tenant_id, timeline_id)
|
|
|
|
pageserver_http.timeline_checkpoint(tenant_id, timeline_id)
|
|
layers = pageserver_http.layer_map_info(tenant_id, timeline_id)
|
|
log.info(f"{layers}")
|
|
assert len(layers.historic_layers) >= 4
|
|
|
|
timelines.append((tenant_id, timeline_id, layers))
|
|
|
|
eviction_env = EvictionEnv(
|
|
timelines=timelines,
|
|
neon_env=env,
|
|
pageserver_http=pageserver_http,
|
|
layer_size=layer_size,
|
|
pg_bin=pg_bin,
|
|
)
|
|
|
|
yield eviction_env
|
|
|
|
(broken_on_disk_after, _, _) = poor_mans_du(
|
|
eviction_env.neon_env, [(broken_tenant_id, broken_timeline_id)]
|
|
)
|
|
|
|
assert (
|
|
broken_on_disk_before == broken_on_disk_after
|
|
), "only touch active tenants with disk_usage_eviction"
|
|
|
|
|
|
def test_pageserver_evicts_until_pressure_is_relieved(eviction_env: EvictionEnv):
|
|
"""
|
|
Basic test to ensure that we evict enough to relieve pressure.
|
|
"""
|
|
env = eviction_env
|
|
pageserver_http = env.pageserver_http
|
|
|
|
(total_on_disk, _, _) = env.timelines_du()
|
|
|
|
target = total_on_disk // 2
|
|
|
|
response = pageserver_http.disk_usage_eviction_run({"evict_bytes": target})
|
|
log.info(f"{response}")
|
|
|
|
(later_total_on_disk, _, _) = env.timelines_du()
|
|
|
|
actual_change = total_on_disk - later_total_on_disk
|
|
|
|
assert 0 <= actual_change, "nothing can load layers during this test"
|
|
assert actual_change >= target, "must evict more than half"
|
|
assert (
|
|
response["Finished"]["assumed"]["projected_after"]["freed_bytes"] >= actual_change
|
|
), "report accurately evicted bytes"
|
|
assert response["Finished"]["assumed"]["failed"]["count"] == 0, "zero failures expected"
|
|
|
|
|
|
def test_pageserver_respects_overridden_resident_size(eviction_env: EvictionEnv):
|
|
"""
|
|
Override tenant min resident and ensure that it will be respected by eviction.
|
|
"""
|
|
env = eviction_env
|
|
ps_http = env.pageserver_http
|
|
|
|
(total_on_disk, _, _) = env.timelines_du()
|
|
du_by_timeline = env.du_by_timeline()
|
|
log.info("du_by_timeline: %s", du_by_timeline)
|
|
|
|
assert len(du_by_timeline) == 2, "this test assumes two tenants"
|
|
large_tenant = max(du_by_timeline, key=du_by_timeline.__getitem__)
|
|
small_tenant = min(du_by_timeline, key=du_by_timeline.__getitem__)
|
|
assert du_by_timeline[large_tenant] > du_by_timeline[small_tenant]
|
|
assert (
|
|
du_by_timeline[large_tenant] - du_by_timeline[small_tenant] > 5 * env.layer_size
|
|
), "ensure this test will do more than 1 eviction"
|
|
|
|
# give the larger tenant a haircut while prevening the smaller tenant from getting one
|
|
min_resident_size = du_by_timeline[small_tenant]
|
|
target = du_by_timeline[large_tenant] - du_by_timeline[small_tenant]
|
|
assert any(
|
|
[du > min_resident_size for du in du_by_timeline.values()]
|
|
), "ensure the larger tenant will get a haircut"
|
|
|
|
ps_http.set_tenant_config(small_tenant[0], {"min_resident_size_override": min_resident_size})
|
|
ps_http.set_tenant_config(large_tenant[0], {"min_resident_size_override": min_resident_size})
|
|
|
|
# do one run
|
|
response = ps_http.disk_usage_eviction_run({"evict_bytes": target})
|
|
log.info(f"{response}")
|
|
|
|
time.sleep(1) # give log time to flush
|
|
assert not env.neon_env.pageserver.log_contains(
|
|
"falling back to global LRU"
|
|
), "this test is pointless if it fell back to global LRU"
|
|
|
|
(later_total_on_disk, _, _) = env.timelines_du()
|
|
later_du_by_timeline = env.du_by_timeline()
|
|
log.info("later_du_by_timeline: %s", later_du_by_timeline)
|
|
|
|
actual_change = total_on_disk - later_total_on_disk
|
|
assert 0 <= actual_change, "nothing can load layers during this test"
|
|
assert actual_change >= target, "eviction must always evict more than target"
|
|
assert (
|
|
response["Finished"]["assumed"]["projected_after"]["freed_bytes"] >= actual_change
|
|
), "report accurately evicted bytes"
|
|
assert response["Finished"]["assumed"]["failed"]["count"] == 0, "zero failures expected"
|
|
|
|
assert (
|
|
later_du_by_timeline[small_tenant] == du_by_timeline[small_tenant]
|
|
), "small tenant sees no haircut"
|
|
assert (
|
|
later_du_by_timeline[large_tenant] < du_by_timeline[large_tenant]
|
|
), "large tenant gets a haircut"
|
|
assert du_by_timeline[large_tenant] - later_du_by_timeline[large_tenant] >= target
|
|
|
|
|
|
def test_pageserver_falls_back_to_global_lru(eviction_env: EvictionEnv):
|
|
"""
|
|
The pageserver should fall back to global LRU if the tenant_min_resident_size-respecting eviction
|
|
wouldn't evict enough.
|
|
"""
|
|
env = eviction_env
|
|
ps_http = env.pageserver_http
|
|
|
|
(total_on_disk, _, _) = env.timelines_du()
|
|
target = total_on_disk
|
|
|
|
response = ps_http.disk_usage_eviction_run({"evict_bytes": target})
|
|
log.info(f"{response}")
|
|
|
|
(later_total_on_disk, _, _) = env.timelines_du()
|
|
actual_change = total_on_disk - later_total_on_disk
|
|
assert 0 <= actual_change, "nothing can load layers during this test"
|
|
assert actual_change >= target, "eviction must always evict more than target"
|
|
|
|
time.sleep(1) # give log time to flush
|
|
assert env.neon_env.pageserver.log_contains("falling back to global LRU")
|
|
env.neon_env.pageserver.allowed_errors.append(".*falling back to global LRU")
|
|
|
|
|
|
def test_partial_evict_tenant(eviction_env: EvictionEnv):
|
|
env = eviction_env
|
|
ps_http = env.pageserver_http
|
|
|
|
(total_on_disk, _, _) = env.timelines_du()
|
|
du_by_timeline = env.du_by_timeline()
|
|
|
|
# pick any tenant
|
|
[our_tenant, other_tenant] = list(du_by_timeline.keys())
|
|
(tenant_id, timeline_id) = our_tenant
|
|
tenant_usage = du_by_timeline[our_tenant]
|
|
|
|
# make our tenant more recently used than the other one
|
|
with env.neon_env.postgres.create_start("main", tenant_id=tenant_id) as pg:
|
|
env.pg_bin.run(["pgbench", "-S", pg.connstr()])
|
|
|
|
target = total_on_disk - (tenant_usage // 2)
|
|
response = ps_http.disk_usage_eviction_run({"evict_bytes": target})
|
|
log.info(f"{response}")
|
|
|
|
(later_total_on_disk, _, _) = env.timelines_du()
|
|
actual_change = total_on_disk - later_total_on_disk
|
|
assert 0 <= actual_change, "nothing can load layers during this test"
|
|
assert actual_change >= target, "eviction must always evict more than target"
|
|
|
|
later_du_by_timeline = env.du_by_timeline()
|
|
for tenant, later_tenant_usage in later_du_by_timeline.items():
|
|
assert (
|
|
later_tenant_usage < du_by_timeline[tenant]
|
|
), "all tenants should have lost some layers"
|
|
|
|
assert (
|
|
later_du_by_timeline[our_tenant] > 0.4 * tenant_usage
|
|
), "our warmed up tenant should be at about half capacity"
|
|
assert (
|
|
later_du_by_timeline[other_tenant] < 2 * env.layer_size
|
|
), "the other tenant should be completely evicted"
|
|
|
|
|
|
def poor_mans_du(
|
|
env: NeonEnv, timelines: list[Tuple[TenantId, TimelineId]]
|
|
) -> Tuple[int, int, int]:
|
|
"""
|
|
Disk usage, largest, smallest layer for layer files over the given (tenant, timeline) tuples;
|
|
this could be done over layers endpoint just as well.
|
|
"""
|
|
total_on_disk = 0
|
|
largest_layer = 0
|
|
smallest_layer = None
|
|
for tenant_id, timeline_id in timelines:
|
|
dir = Path(env.repo_dir) / "tenants" / str(tenant_id) / "timelines" / str(timeline_id)
|
|
assert dir.exists(), f"timeline dir does not exist: {dir}"
|
|
sum = 0
|
|
for file in dir.iterdir():
|
|
if "__" not in file.name:
|
|
continue
|
|
size = file.stat().st_size
|
|
sum += size
|
|
largest_layer = max(largest_layer, size)
|
|
if smallest_layer:
|
|
smallest_layer = min(smallest_layer, size)
|
|
else:
|
|
smallest_layer = size
|
|
log.info(f"{tenant_id}/{timeline_id} => {file.name} {size}")
|
|
|
|
log.info(f"{tenant_id}/{timeline_id}: sum {sum}")
|
|
total_on_disk += sum
|
|
|
|
assert smallest_layer is not None or total_on_disk == 0 and largest_layer == 0
|
|
return (total_on_disk, largest_layer, smallest_layer or 0)
|