Files
neon/test_runner/regress/test_threshold_based_eviction.py
Christian Schwarz 0f20dae3c3 impr: merge pageserver_api::models::TenantConfig and pageserver::tenant::config::TenantConfOpt (#11298)
The only difference between
- `pageserver_api::models::TenantConfig` and
- `pageserver::tenant::config::TenantConfOpt`

at this point is that `TenantConfOpt` serializes with
`skip_serializing_if = Option::is_none`.
That is an efficiency improvement for all the places that currently
serde `models::TenantConfig` because new serializations will no longer
write `$fieldname: null` for each field that is `None` at runtime.

This should be particularly beneficial for Storcon, which stores
JSON-serialized `models::TenantConfig` in its DB.

# Behavior Changes


This PR changes the serialization behavior: we omit `None` fields
instead of serializing `$fieldname: null`).

So it's a data format change (see section on compatibility below).

And it changes API responses from Storcon and Pageserver.

## API Response Compatibility

Storcon returns the location description.
Afaik it is passed through into
- storcon_cli output
- storcon UI in console admin UI

These outputs will no longer contain `$fieldname: null` values,
which de-bloats the output (good).
But in storcon UI, it also serves as an editor "default", which
will be eliminated after a storcon with this PR is released.


## Data Format Compatibility


Backwards compat: new software reading old serialized data will
deserialize to the same runtime value because all the field types
are exactly the same and `skip_serializing_if` does not affect
deserialization.

Forward compat: old software reading data serialized by new software
will map absence fields in the serialized form to runtime value
`Option::None`. This is serde default behavior, see this playground
to convince yourself:

https://play.rust-lang.org/?version=stable&mode=debug&edition=2024&gist=f7f4e1a169959a3085b6158c022a05eb

The `serde(with="humantime_serde")` however behaves strangely:
if used on an `Option<Duration>`, it still requires the field to be
present,
unlike the serde default behavior shown in the previous paragraph.
The workaround is to set `serde(default)`.
Previously it was set on each individual field, but, we do have the
container attribute, so, set it there.
This requires deriving a `Default` impl, which, because all fields are
`Option`,
is non-magic.
See my notes here:
https://gist.github.com/problame/eddbc225a5d12617e9f2c6413e0cf799

# Future Work

We should have separate types (& crates) for
- runtime types configuration (e.g. PageServerConf::tenant_config,
AttachedLocationConf)
- `config-v1` file pageserver local disk file format
- `mgmt API`
- `pageserver.toml`

Right now they all use the same, which is convenient but makes it hard
to reason about compatibility breakage.

# Refs

- corresponding docs.neon.build PR
https://github.com/neondatabase/docs/pull/470
2025-03-19 12:47:17 +00:00

195 lines
7.9 KiB
Python

from __future__ import annotations
import time
from dataclasses import dataclass
from typing import TYPE_CHECKING
from fixtures.log_helper import log
from fixtures.neon_fixtures import (
NeonEnvBuilder,
PgBin,
last_flush_lsn_upload,
)
from fixtures.pageserver.http import LayerMapInfo
from fixtures.remote_storage import RemoteStorageKind
from pytest_httpserver import HTTPServer
if TYPE_CHECKING:
from fixtures.httpserver import ListenAddress
# NB: basic config change tests are in test_tenant_conf.py
def test_threshold_based_eviction(
httpserver: HTTPServer,
httpserver_listen_address: ListenAddress,
pg_bin: PgBin,
neon_env_builder: NeonEnvBuilder,
):
neon_env_builder.enable_pageserver_remote_storage(RemoteStorageKind.LOCAL_FS)
# Start with metrics collection enabled, so that the eviction task
# imitates its accesses. We'll use a non-existent endpoint to make it fail.
# The synthetic size calculation will run regardless.
host, port = httpserver_listen_address
neon_env_builder.pageserver_config_override = f"""
metric_collection_interval="1s"
synthetic_size_calculation_interval="2s"
metric_collection_endpoint="http://{host}:{port}/nonexistent"
"""
metrics_refused_log_line = (
".*metrics_collection:.* upload consumption_metrics (still failed|failed, will retry).*"
)
env = neon_env_builder.init_start()
env.pageserver.allowed_errors.extend(
[
metrics_refused_log_line,
# these can happen whenever we run consumption metrics collection
r".*failed to calculate logical size at \S+: cancelled",
r".*failed to calculate synthetic size for tenant \S+: failed to calculate some logical_sizes",
]
)
tenant_id, timeline_id = env.initial_tenant, env.initial_timeline
ps_http = env.pageserver.http_client()
vps_http = env.storage_controller.pageserver_api()
# check config on pageserver, set via storcon; https://github.com/neondatabase/neon/issues/9621
assert ps_http.tenant_config(tenant_id).tenant_specific_overrides == {}
assert ps_http.tenant_config(tenant_id).effective_config["eviction_policy"] == {
"kind": "NoEviction"
}
eviction_threshold = 10
eviction_period = 2
vps_http.set_tenant_config(
tenant_id,
{
"eviction_policy": {
"kind": "LayerAccessThreshold",
"threshold": f"{eviction_threshold}s",
"period": f"{eviction_period}s",
},
},
)
assert ps_http.tenant_config(tenant_id).effective_config["eviction_policy"] == {
"kind": "LayerAccessThreshold",
"threshold": f"{eviction_threshold}s",
"period": f"{eviction_period}s",
}
# restart because changing tenant config is not instant
env.pageserver.restart()
assert ps_http.tenant_config(tenant_id).effective_config["eviction_policy"] == {
"kind": "LayerAccessThreshold",
"threshold": f"{eviction_threshold}s",
"period": f"{eviction_period}s",
}
# create a bunch of L1s, only the least of which will need to be resident
compaction_threshold = 3 # create L1 layers quickly
vps_http.update_tenant_config(
tenant_id,
inserts={
# Disable gc and compaction to avoid on-demand downloads from their side.
# The only on-demand downloads should be from the eviction tasks's "imitate access" functions.
"gc_period": "0s",
"compaction_period": "0s",
# low checkpoint_distance so that pgbench creates many layers
"checkpoint_distance": 1024**2,
# Low compaction target size to create many L1's with tight key ranges.
# This is so that the "imitate access" don't download all the layers.
"compaction_target_size": 1 * 1024**2, # all keys into one L1
# Turn L0's into L1's fast.
"compaction_threshold": compaction_threshold,
# Prevent compaction from collapsing L1 delta layers into image layers. We want many layers here.
"image_creation_threshold": 100,
# Much larger so that synthetic size caluclation worker, which is part of metric collection,
# computes logical size for initdb_lsn every time, instead of some moving lsn as we insert data.
# This makes the set of downloaded layers predictable,
# thereby allowing the residence statuses to stabilize below.
"gc_horizon": 1024**4,
},
)
# create a bunch of layers
with env.endpoints.create_start("main", tenant_id=tenant_id) as pg:
pg_bin.run(["pgbench", "-i", "-I", "dtGvp", "-s", "3", pg.connstr()])
last_flush_lsn_upload(env, pg, tenant_id, timeline_id)
# wrap up and shutdown safekeepers so that no more layers will be created after the final checkpoint
for sk in env.safekeepers:
sk.stop()
ps_http.timeline_checkpoint(tenant_id, timeline_id)
# wait for evictions and assert that they stabilize
@dataclass
class ByLocalAndRemote:
remote_layers: set[str]
local_layers: set[str]
class MapInfoProjection:
def __init__(self, info: LayerMapInfo):
self.info = info
def by_local_and_remote(self) -> ByLocalAndRemote:
return ByLocalAndRemote(
remote_layers={
layer.layer_file_name for layer in self.info.historic_layers if layer.remote
},
local_layers={
layer.layer_file_name for layer in self.info.historic_layers if not layer.remote
},
)
def __eq__(self, other):
if not isinstance(other, MapInfoProjection):
return False
return self.by_local_and_remote() == other.by_local_and_remote()
def __repr__(self) -> str:
out = ["MapInfoProjection:"]
for layer in sorted(self.info.historic_layers, key=lambda layer: layer.layer_file_name):
remote = "R" if layer.remote else "L"
out += [f" {remote} {layer.layer_file_name}"]
return "\n".join(out)
stable_for: float = 0
observation_window = 8 * eviction_threshold
consider_stable_when_no_change_for_seconds = 3 * eviction_threshold
poll_interval = eviction_threshold / 3
started_waiting_at = time.time()
map_info_changes: list[tuple[float, MapInfoProjection]] = []
while time.time() - started_waiting_at < observation_window:
current = (
time.time(),
MapInfoProjection(vps_http.layer_map_info(tenant_id, timeline_id)),
)
last = map_info_changes[-1] if map_info_changes else (0, None)
if last[1] is None or current[1] != last[1]:
map_info_changes.append(current)
log.info("change in layer map\n before: %s\n after: %s", last, current)
else:
stable_for = current[0] - last[0]
log.info("residencies stable for %s", stable_for)
if stable_for > consider_stable_when_no_change_for_seconds:
break
time.sleep(poll_interval)
log.info("len(map_info_changes)=%s", len(map_info_changes))
# TODO: can we be more precise here? E.g., require we're stable _within_ X*threshold,
# instead of what we do here, i.e., stable _for at least_ X*threshold toward the end of the observation window
assert (
stable_for > consider_stable_when_no_change_for_seconds
), "layer residencies did not become stable within the observation window"
post = map_info_changes[-1][1].by_local_and_remote()
assert len(post.remote_layers) > 0, "some layers should be evicted once it's stabilized"
assert len(post.local_layers) > 0, "the imitate accesses should keep some layers resident"
assert (
env.pageserver.log_contains(metrics_refused_log_line) is not None
), "ensure the metrics collection worker ran"