mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-07 13:32:57 +00:00
pageserver: remove absolute-order disk usage eviction (#8454)
## Problem
Deployed pageserver configurations are all like this:
```
disk_usage_based_eviction:
max_usage_pct: 85
min_avail_bytes: 0
period: "10s"
eviction_order:
type: "RelativeAccessed"
args:
highest_layer_count_loses_first: true
```
But we're maintaining this optional absolute order eviction, with test
cases etc.
## Summary of changes
- Remove absolute order eviction. Make the default eviction policy the
same as how we really deploy pageservers.
This commit is contained in:
@@ -1601,7 +1601,7 @@ threshold = "20m"
|
||||
period: Duration::from_secs(10),
|
||||
#[cfg(feature = "testing")]
|
||||
mock_statvfs: None,
|
||||
eviction_order: crate::disk_usage_eviction_task::EvictionOrder::AbsoluteAccessed,
|
||||
eviction_order: Default::default(),
|
||||
})
|
||||
);
|
||||
|
||||
|
||||
@@ -83,17 +83,9 @@ pub struct DiskUsageEvictionTaskConfig {
|
||||
|
||||
/// Selects the sort order for eviction candidates *after* per tenant `min_resident_size`
|
||||
/// partitioning.
|
||||
#[derive(Default, Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[serde(tag = "type", content = "args")]
|
||||
pub enum EvictionOrder {
|
||||
/// Order the layers to be evicted by how recently they have been accessed in absolute
|
||||
/// time.
|
||||
///
|
||||
/// This strategy is unfair when some tenants grow faster than others towards the slower
|
||||
/// growing.
|
||||
#[default]
|
||||
AbsoluteAccessed,
|
||||
|
||||
/// Order the layers to be evicted by how recently they have been accessed relatively within
|
||||
/// the set of resident layers of a tenant.
|
||||
RelativeAccessed {
|
||||
@@ -108,6 +100,14 @@ pub enum EvictionOrder {
|
||||
},
|
||||
}
|
||||
|
||||
impl Default for EvictionOrder {
|
||||
fn default() -> Self {
|
||||
Self::RelativeAccessed {
|
||||
highest_layer_count_loses_first: true,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn default_highest_layer_count_loses_first() -> bool {
|
||||
true
|
||||
}
|
||||
@@ -117,11 +117,6 @@ impl EvictionOrder {
|
||||
use EvictionOrder::*;
|
||||
|
||||
match self {
|
||||
AbsoluteAccessed => {
|
||||
candidates.sort_unstable_by_key(|(partition, candidate)| {
|
||||
(*partition, candidate.last_activity_ts)
|
||||
});
|
||||
}
|
||||
RelativeAccessed { .. } => candidates.sort_unstable_by_key(|(partition, candidate)| {
|
||||
(*partition, candidate.relative_last_activity)
|
||||
}),
|
||||
@@ -134,7 +129,6 @@ impl EvictionOrder {
|
||||
use EvictionOrder::*;
|
||||
|
||||
match self {
|
||||
AbsoluteAccessed => finite_f32::FiniteF32::ZERO,
|
||||
RelativeAccessed {
|
||||
highest_layer_count_loses_first,
|
||||
} => {
|
||||
|
||||
@@ -67,14 +67,11 @@ def test_min_resident_size_override_handling(
|
||||
|
||||
@enum.unique
|
||||
class EvictionOrder(str, enum.Enum):
|
||||
ABSOLUTE_ORDER = "absolute"
|
||||
RELATIVE_ORDER_EQUAL = "relative_equal"
|
||||
RELATIVE_ORDER_SPARE = "relative_spare"
|
||||
|
||||
def config(self) -> Dict[str, Any]:
|
||||
if self == EvictionOrder.ABSOLUTE_ORDER:
|
||||
return {"type": "AbsoluteAccessed"}
|
||||
elif self == EvictionOrder.RELATIVE_ORDER_EQUAL:
|
||||
if self == EvictionOrder.RELATIVE_ORDER_EQUAL:
|
||||
return {
|
||||
"type": "RelativeAccessed",
|
||||
"args": {"highest_layer_count_loses_first": False},
|
||||
@@ -384,7 +381,7 @@ def test_broken_tenants_are_skipped(eviction_env: EvictionEnv):
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"order",
|
||||
[EvictionOrder.ABSOLUTE_ORDER, EvictionOrder.RELATIVE_ORDER_EQUAL],
|
||||
[EvictionOrder.RELATIVE_ORDER_EQUAL],
|
||||
)
|
||||
def test_pageserver_evicts_until_pressure_is_relieved(
|
||||
eviction_env: EvictionEnv, order: EvictionOrder
|
||||
@@ -418,7 +415,7 @@ def test_pageserver_evicts_until_pressure_is_relieved(
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"order",
|
||||
[EvictionOrder.ABSOLUTE_ORDER, EvictionOrder.RELATIVE_ORDER_EQUAL],
|
||||
[EvictionOrder.RELATIVE_ORDER_EQUAL],
|
||||
)
|
||||
def test_pageserver_respects_overridden_resident_size(
|
||||
eviction_env: EvictionEnv, order: EvictionOrder
|
||||
@@ -495,7 +492,7 @@ def test_pageserver_respects_overridden_resident_size(
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"order",
|
||||
[EvictionOrder.ABSOLUTE_ORDER, EvictionOrder.RELATIVE_ORDER_EQUAL],
|
||||
[EvictionOrder.RELATIVE_ORDER_EQUAL],
|
||||
)
|
||||
def test_pageserver_falls_back_to_global_lru(eviction_env: EvictionEnv, order: EvictionOrder):
|
||||
"""
|
||||
@@ -526,7 +523,6 @@ def test_pageserver_falls_back_to_global_lru(eviction_env: EvictionEnv, order: E
|
||||
@pytest.mark.parametrize(
|
||||
"order",
|
||||
[
|
||||
EvictionOrder.ABSOLUTE_ORDER,
|
||||
EvictionOrder.RELATIVE_ORDER_EQUAL,
|
||||
EvictionOrder.RELATIVE_ORDER_SPARE,
|
||||
],
|
||||
@@ -572,63 +568,38 @@ def test_partial_evict_tenant(eviction_env: EvictionEnv, order: EvictionOrder):
|
||||
later_tenant_usage < du_by_timeline[tenant]
|
||||
), "all tenants should have lost some layers"
|
||||
|
||||
warm_size = later_du_by_timeline[warm]
|
||||
cold_size = later_du_by_timeline[cold]
|
||||
# with relative order what matters is the amount of layers, with a
|
||||
# fudge factor of whether the eviction bothers tenants with highest
|
||||
# layer count the most. last accessed times between tenants does not
|
||||
# matter.
|
||||
assert order in [EvictionOrder.RELATIVE_ORDER_EQUAL, EvictionOrder.RELATIVE_ORDER_SPARE]
|
||||
layers_now = env.count_layers_per_tenant(env.pageserver)
|
||||
|
||||
if order == EvictionOrder.ABSOLUTE_ORDER:
|
||||
# bounds for warmed_size
|
||||
warm_lower = 0.5 * du_by_timeline[warm]
|
||||
expected_ratio = later_total_on_disk / total_on_disk
|
||||
log.info(
|
||||
f"freed up {100 * expected_ratio}%, expecting the layer counts to decrease in similar ratio"
|
||||
)
|
||||
|
||||
# We don't know exactly whether the cold tenant needs 2 or just 1 env.layer_size wiggle room.
|
||||
# So, check for up to 3 here.
|
||||
warm_upper = warm_lower + 3 * env.layer_size
|
||||
for tenant_id, original_count in tenant_layers.items():
|
||||
count_now = layers_now[tenant_id]
|
||||
ratio = count_now / original_count
|
||||
abs_diff = abs(ratio - expected_ratio)
|
||||
assert original_count > count_now
|
||||
|
||||
cold_upper = 2 * env.layer_size
|
||||
log.info(f"tenants: warm={warm[0]}, cold={cold[0]}")
|
||||
expectation = 0.06
|
||||
log.info(
|
||||
f"expecting for warm tenant: {human_bytes(warm_lower)} < {human_bytes(warm_size)} < {human_bytes(warm_upper)}"
|
||||
f"tenant {tenant_id} layer count {original_count} -> {count_now}, ratio: {ratio}, expecting {abs_diff} < {expectation}"
|
||||
)
|
||||
log.info(f"expecting for cold tenant: {human_bytes(cold_size)} < {human_bytes(cold_upper)}")
|
||||
|
||||
assert warm_size > warm_lower, "warmed up tenant should be at about half size (lower)"
|
||||
assert warm_size < warm_upper, "warmed up tenant should be at about half size (upper)"
|
||||
|
||||
assert (
|
||||
cold_size < cold_upper
|
||||
), "the cold tenant should be evicted to its min_resident_size, i.e., max layer file size"
|
||||
else:
|
||||
# with relative order what matters is the amount of layers, with a
|
||||
# fudge factor of whether the eviction bothers tenants with highest
|
||||
# layer count the most. last accessed times between tenants does not
|
||||
# matter.
|
||||
layers_now = env.count_layers_per_tenant(env.pageserver)
|
||||
|
||||
expected_ratio = later_total_on_disk / total_on_disk
|
||||
log.info(
|
||||
f"freed up {100 * expected_ratio}%, expecting the layer counts to decrease in similar ratio"
|
||||
)
|
||||
|
||||
for tenant_id, original_count in tenant_layers.items():
|
||||
count_now = layers_now[tenant_id]
|
||||
ratio = count_now / original_count
|
||||
abs_diff = abs(ratio - expected_ratio)
|
||||
assert original_count > count_now
|
||||
|
||||
expectation = 0.06
|
||||
log.info(
|
||||
f"tenant {tenant_id} layer count {original_count} -> {count_now}, ratio: {ratio}, expecting {abs_diff} < {expectation}"
|
||||
)
|
||||
# in this test case both relative_spare and relative_equal produce
|
||||
# the same outcomes; this must be a quantization effect of similar
|
||||
# sizes (-s4 and -s6) and small (5MB) layer size.
|
||||
# for pg15 and pg16 the absdiff is < 0.01, for pg14 it is closer to 0.02
|
||||
assert abs_diff < expectation
|
||||
# in this test case both relative_spare and relative_equal produce
|
||||
# the same outcomes; this must be a quantization effect of similar
|
||||
# sizes (-s4 and -s6) and small (5MB) layer size.
|
||||
# for pg15 and pg16 the absdiff is < 0.01, for pg14 it is closer to 0.02
|
||||
assert abs_diff < expectation
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"order",
|
||||
[
|
||||
EvictionOrder.ABSOLUTE_ORDER,
|
||||
EvictionOrder.RELATIVE_ORDER_EQUAL,
|
||||
EvictionOrder.RELATIVE_ORDER_SPARE,
|
||||
],
|
||||
@@ -680,14 +651,7 @@ def test_fast_growing_tenant(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin, or
|
||||
), "rest of the assertions expect 3 + 1 timelines, ratios, scales, all in order"
|
||||
log.info(f"{ratios}")
|
||||
|
||||
if order == EvictionOrder.ABSOLUTE_ORDER:
|
||||
# first tenant loses most
|
||||
assert ratios[0] <= ratios[1], "first should lose the most"
|
||||
assert ratios[1] < ratios[2], "second should lose some"
|
||||
assert ratios[1] < 1.0
|
||||
assert ratios[2] <= ratios[3], "third might not lose"
|
||||
assert ratios[3] == 1.0, "tenant created last does not lose"
|
||||
elif order == EvictionOrder.RELATIVE_ORDER_EQUAL:
|
||||
if order == EvictionOrder.RELATIVE_ORDER_EQUAL:
|
||||
assert all([x for x in ratios if x < 1.0]), "all tenants lose layers"
|
||||
elif order == EvictionOrder.RELATIVE_ORDER_SPARE:
|
||||
# with different layer sizes and pg versions, there are different combinations
|
||||
@@ -750,7 +714,7 @@ def test_statvfs_error_handling(eviction_env: EvictionEnv):
|
||||
"type": "Failure",
|
||||
"mocked_error": "EIO",
|
||||
},
|
||||
eviction_order=EvictionOrder.ABSOLUTE_ORDER,
|
||||
eviction_order=EvictionOrder.RELATIVE_ORDER_SPARE,
|
||||
)
|
||||
|
||||
env.neon_env.pageserver.assert_log_contains(".*statvfs failed.*EIO")
|
||||
@@ -784,7 +748,7 @@ def test_statvfs_pressure_usage(eviction_env: EvictionEnv):
|
||||
# This avoids accounting for metadata files & tenant conf in the tests.
|
||||
"name_filter": ".*__.*",
|
||||
},
|
||||
eviction_order=EvictionOrder.ABSOLUTE_ORDER,
|
||||
eviction_order=EvictionOrder.RELATIVE_ORDER_SPARE,
|
||||
)
|
||||
|
||||
wait_until(
|
||||
@@ -837,7 +801,7 @@ def test_statvfs_pressure_min_avail_bytes(eviction_env: EvictionEnv):
|
||||
# This avoids accounting for metadata files & tenant conf in the tests.
|
||||
"name_filter": ".*__.*",
|
||||
},
|
||||
eviction_order=EvictionOrder.ABSOLUTE_ORDER,
|
||||
eviction_order=EvictionOrder.RELATIVE_ORDER_SPARE,
|
||||
)
|
||||
|
||||
wait_until(
|
||||
|
||||
Reference in New Issue
Block a user