feat: bring back min_resident=max(all layers) behavior

This commit is contained in:
Christian Schwarz
2023-03-28 17:20:06 +02:00
parent ea3c76a9d6
commit 85becb148f
2 changed files with 31 additions and 9 deletions

View File

@@ -289,6 +289,7 @@ pub async fn disk_usage_eviction_task_iteration_impl<U: Usage>(
usage_pre: U,
cancel: &CancellationToken,
) -> anyhow::Result<IterationOutcome<U>> {
// use tokio's mutex to get a Sync guard (instead of std::sync::Mutex)
let _g = state
.mutex
.try_lock()
@@ -561,7 +562,13 @@ async fn collect_eviction_candidates(
};
// collect layers from all timelines in this tenant
//
// If one of the timelines becomes `!is_active()` during the iteration,
// for example because we're shutting down, then `max_layer_size` can be too small.
// That's OK. This code only runs under a disk pressure situation, and being
// a little unfair to tenants during shutdown in such a situation is tolerable.
let mut tenant_candidates = Vec::new();
let mut max_layer_size = 0;
for tl in tenant.list_timelines() {
if !tl.is_active() {
continue;
@@ -573,12 +580,24 @@ async fn collect_eviction_candidates(
.into_iter()
.map(|layer_infos| (tl.clone(), layer_infos)),
);
max_layer_size = max_layer_size.max(info.max_layer_size.unwrap_or(0));
if cancel.is_cancelled() {
return Ok(EvictionCandidates::Cancelled);
}
}
// `min_resident_size` defaults to maximum layer file size of the tenant.
// This ensures that each tenant can have at least one layer resident at a given time,
// ensuring forward progress for a single Timeline::get in that tenant.
// It's a questionable heuristic since there are many Timeline::get
// requests going on and multiple layers are needed, and, at least in Neon prod,
// the median layer file size is much smaller than the compaction target size.
// We could be better here, e.g., sum of all L0 layers + most recent L1 layer.
// That's what's typically used by the various background loops.
//
// The default can be overriden with a fixed value in the tenant conf.
// A default override can be put in the default tenant conf in the pageserver.toml.
let min_resident_size = if let Some(s) = tenant.get_min_resident_size_override() {
info!(
"using overridden min resident size {} for tenant {}",
@@ -587,18 +606,12 @@ async fn collect_eviction_candidates(
);
s
} else {
// By default, use the size of the largest resident layer
let s = tenant_candidates
.iter()
.map(|(_, layer_info)| layer_info.file_size())
.max()
.unwrap_or(0);
info!(
"using max layer size {} for tenant {}",
s,
max_layer_size,
tenant.tenant_id()
);
s
max_layer_size
};
// Sort layers most-recently-used first

View File

@@ -4038,6 +4038,8 @@ impl Timeline {
}
pub struct DiskUsageEvictionInfo {
/// Timeline's largest layer (remote or resident)
pub max_layer_size: Option<u64>,
/// Timeline's resident layers
pub resident_layers: Vec<LocalLayerInfoForDiskUsageEviction>,
}
@@ -4070,9 +4072,13 @@ impl Timeline {
pub(crate) fn get_local_layers_for_disk_usage_eviction(&self) -> DiskUsageEvictionInfo {
let layers = self.layers.read().unwrap();
let mut max_layer_size: Option<u64> = None;
let mut resident_layers = Vec::new();
for l in layers.iter_historic_layers() {
let file_size = l.file_size();
max_layer_size = max_layer_size.map_or(Some(file_size), |m| Some(m.max(file_size)));
if l.is_remote_layer() {
continue;
}
@@ -4085,7 +4091,10 @@ impl Timeline {
});
}
DiskUsageEvictionInfo { resident_layers }
DiskUsageEvictionInfo {
max_layer_size,
resident_layers,
}
}
}